From bbe0716a10bb34bdb05652c56519bb4a5985c67e Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Thu, 23 May 2019 23:39:20 +0000 Subject: [PATCH 01/10] cuda 10 support and multi-arch gpu --- .gitignore | 4 ++++ Makefile | 40 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 63fc1fd..45762a7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,10 @@ tests/sub_* tests/mul_* tests/modexp_* tests/paillier_* +tests/test-suite + +# Benchmark +bench/bench # Object files *.o diff --git a/Makefile b/Makefile index 6136203..ce7342f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,4 @@ CXX ?= g++ -GENCODES ?= 50 - INCLUDE_DIRS = -I./src NVCC_FLAGS = -ccbin $(CXX) -std=c++11 -Xcompiler -Wall,-Wextra NVCC_OPT_FLAGS = -DNDEBUG @@ -9,17 +7,51 @@ NVCC_DBG_FLAGS = -g -G NVCC_LIBS = -lstdc++ NVCC_TEST_LIBS = -lgtest +# SOURCE: https://docs.nvidia.com/cuda/turing-compatibility-guide/index.html#building-turing-compatible-apps-using-cuda-10-0 +# NOTE: Supports 6 different chipsets, but results in binary sizes +# 7x larger (21MB:3MB) and build times 7X slower (80s:12s). +# DECODE: +# 50==Maxwell:Tesla/Quadro, 52==Maxewll,GTX 9**, 60==Pascal:Tesla P100 +# 61==Pascal:GTX 10**, 70==Volta:Tesla V100, 75==Turing:RTX 20** + +NVCC_TURING_COMPAT_MODE = -arch=sm_50 \ +-gencode=arch=compute_50,code=sm_50 \ +-gencode=arch=compute_52,code=sm_52 \ +-gencode=arch=compute_60,code=sm_60 \ +-gencode=arch=compute_61,code=sm_61 \ +-gencode=arch=compute_70,code=sm_70 \ +-gencode=arch=compute_75,code=sm_75 \ +-gencode=arch=compute_75,code=compute_75 + + +# Check that nvcc is in path +ifeq (, $(shell which nvcc)) + $(error "No nvcc in $$PATH, consider doing: export PATH=$$PATH:/usr/local/cuda/bin") +endif + + all: @echo "Please run 'make check' or 'make bench'." tests/test-suite: tests/test-suite.cu - nvcc $(NVCC_TEST_FLAGS) $(NVCC_FLAGS) $(GENCODES:%=--gpu-architecture=compute_%) $(GENCODES:%=--gpu-code=sm_%) $(INCLUDE_DIRS) $(NVCC_LIBS) $(NVCC_TEST_LIBS) -o $@ $< + nvcc $(NVCC_TEST_FLAGS) \ + $(NVCC_FLAGS) \ + $(NVCC_TURING_COMPAT_MODE) \ + $(INCLUDE_DIRS) \ + $(NVCC_LIBS) \ + $(NVCC_TEST_LIBS) \ + -o $@ $< check: tests/test-suite @./tests/test-suite bench/bench: bench/bench.cu - nvcc $(NVCC_OPT_FLAGS) $(NVCC_FLAGS) $(GENCODES:%=--gpu-architecture=compute_%) $(GENCODES:%=--gpu-code=sm_%) $(INCLUDE_DIRS) $(NVCC_LIBS) -o $@ $< + nvcc $(NVCC_OPT_FLAGS) \ + $(NVCC_FLAGS) \ + $(NVCC_TURING_COMPAT_MODE) \ + $(INCLUDE_DIRS) \ + $(NVCC_LIBS) \ + -o $@ $< bench: bench/bench From 72be12ca94887b62cc737442b42eeee2ee09e8f8 Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Thu, 30 May 2019 01:21:08 +0000 Subject: [PATCH 02/10] cmake --- .gitignore | 12 ++-------- CMakeLists.txt | 31 ++++++++++++++++++++++++++ Makefile | 60 -------------------------------------------------- README.md | 15 ++++++++++--- 4 files changed, 45 insertions(+), 73 deletions(-) create mode 100644 CMakeLists.txt delete mode 100644 Makefile diff --git a/.gitignore b/.gitignore index 45762a7..f5f17f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,5 @@ -# Test input -tests/add_* -tests/sub_* -tests/mul_* -tests/modexp_* -tests/paillier_* -tests/test-suite - -# Benchmark -bench/bench +# Build +build # Object files *.o diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..bfd8120 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,31 @@ + +CMAKE_MINIMUM_REQUIRED(VERSION 3.8) +project(cuda-fixnum CXX CUDA) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CUDA_STANDARD 11) + +SET(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/bin") + +# CUDA +find_package(CUDA REQUIRED) +set(CUDA_ARCH_LIST Auto CACHE LIST + "List of CUDA architectures (e.g. Pascal, Volta, etc) or \ + compute capability versions (6.1, 7.0, etc) to generate code for. \ + Set to Auto for automatic detection (default)." +) +cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS ${CUDA_ARCH_LIST}) +string(REPLACE ";" " " CUDA_ARCH_FLAGS_SPACES "${CUDA_ARCH_FLAGS}") + +string(APPEND CMAKE_CUDA_FLAGS " ${CUDA_ARCH_FLAGS_SPACES}") +string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler -Wall,-Wextra") +set(CMAKE_CUDA_FLAGS_DEBUG "-g -G") +add_definitions(-DNDEBUG) + +# CODE +include_directories(src) +add_executable(bench bench/bench.cu) +add_executable(check tests/test-suite.cu) + +target_link_libraries(bench -lstdc++) +target_link_libraries(check -lstdc++ -lgtest) diff --git a/Makefile b/Makefile deleted file mode 100644 index ce7342f..0000000 --- a/Makefile +++ /dev/null @@ -1,60 +0,0 @@ -CXX ?= g++ -INCLUDE_DIRS = -I./src -NVCC_FLAGS = -ccbin $(CXX) -std=c++11 -Xcompiler -Wall,-Wextra -NVCC_OPT_FLAGS = -DNDEBUG -NVCC_TEST_FLAGS = -lineinfo -NVCC_DBG_FLAGS = -g -G -NVCC_LIBS = -lstdc++ -NVCC_TEST_LIBS = -lgtest - -# SOURCE: https://docs.nvidia.com/cuda/turing-compatibility-guide/index.html#building-turing-compatible-apps-using-cuda-10-0 -# NOTE: Supports 6 different chipsets, but results in binary sizes -# 7x larger (21MB:3MB) and build times 7X slower (80s:12s). -# DECODE: -# 50==Maxwell:Tesla/Quadro, 52==Maxewll,GTX 9**, 60==Pascal:Tesla P100 -# 61==Pascal:GTX 10**, 70==Volta:Tesla V100, 75==Turing:RTX 20** - -NVCC_TURING_COMPAT_MODE = -arch=sm_50 \ --gencode=arch=compute_50,code=sm_50 \ --gencode=arch=compute_52,code=sm_52 \ --gencode=arch=compute_60,code=sm_60 \ --gencode=arch=compute_61,code=sm_61 \ --gencode=arch=compute_70,code=sm_70 \ --gencode=arch=compute_75,code=sm_75 \ --gencode=arch=compute_75,code=compute_75 - - -# Check that nvcc is in path -ifeq (, $(shell which nvcc)) - $(error "No nvcc in $$PATH, consider doing: export PATH=$$PATH:/usr/local/cuda/bin") -endif - - -all: - @echo "Please run 'make check' or 'make bench'." - -tests/test-suite: tests/test-suite.cu - nvcc $(NVCC_TEST_FLAGS) \ - $(NVCC_FLAGS) \ - $(NVCC_TURING_COMPAT_MODE) \ - $(INCLUDE_DIRS) \ - $(NVCC_LIBS) \ - $(NVCC_TEST_LIBS) \ - -o $@ $< - -check: tests/test-suite - @./tests/test-suite - -bench/bench: bench/bench.cu - nvcc $(NVCC_OPT_FLAGS) \ - $(NVCC_FLAGS) \ - $(NVCC_TURING_COMPAT_MODE) \ - $(INCLUDE_DIRS) \ - $(NVCC_LIBS) \ - -o $@ $< - -bench: bench/bench - -.PHONY: clean -clean: - $(RM) tests/test-suite bench/bench diff --git a/README.md b/README.md index b304b17..82d803e 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ The primary use case for fast arithmetic of numbers in the range covered by `cuda-fixnum` is in cryptography and computational number theory; in particular it can form an integral part in accelerating homomorphic encryption primitives as used in privacy-preserving machine learning. As such, special attention is given to support modular arithmetic; this is used in an example implementation of the Paillier additively homomorphic encryption scheme and of elliptic curve scalar multiplication. Future releases will provide additional support for operations useful to implementing Ring-LWE-based somewhat homomorphic encryption schemes. -Finally, the library is designed to be _fast_. Through exploitation of warp-synchronous programming, vote functions, and deferred carry handling, the primitives of the library are currently competitive with the state-of-the-art in the literature for modular multiplication and modular exponentiation on GPUs. The design of the library allows transparent substitution of the underlying arithmetic, allowing the user to select whichever performs best on the available hardware. Moreover, several algorithms, both novel and from the literature, will be incorporated shortly that will improve performance by a further 25-50%. +Finally, the library is designed to be _fast_. Through exploitation of warp-synchronous programming, vote functions, and deferred carry handling, the primitives of the library are currently competitive with the state-of-the-art in the literature for modular multiplication and modular exponentiation on GPUs. The design of the library allows transparent substitution of the underlying arithmetic, allowing the user to select whichever performs best on the available hardware. Moreover, several algorithms, both novel and from the literature, will be incoporated shortly that will improve performance by a further 25-50%. The library is currently at the _alpha_ stage of development. It has many rough edges, but most features are present and it is performant enough to be competitive. Comments, questions and contributions are welcome! @@ -126,14 +126,23 @@ void host_function() { ## Building -The build system for cuda-fixnum is currently, shall we say, _primitive_. Basically you can run `make bench` to build the benchmarking program, or `make check` to build and run the test suite. The test suite requires the [Google Test framework](https://github.com/google/googletest) to be installed. The Makefile will read in the variables `CXX` and `GENCODES` from the environment as a convenient way to specify the C++ compiler to use and the Cuda compute capability codes that you want to compile with. The defaults are `CXX = g++` and `GENCODES = 50`. +``` +mkdir build +cd build +cmake .. +make bench +``` + +The build system for cuda-fixnum use CMake. Create a working directory to build in and run cmake from there. Then you can run `make bench` to build the benchmarking program, or `make check` to build and run the test suite. The test suite requires the [Google Test framework](https://github.com/google/googletest) to be installed. + +CMake attempts to detect and set the proper flags for you GPU architecture, but you can override them or cross compile if needed. ## Benchmarks Here is the output from a recent run of the benchmark with a GTX Titan X (Maxwell, 1GHz clock, 3072 cores): ``` -$ bench/bench 5000000 +$ bin/bench 5000000 Function: mul_lo, #elts: 5000e3 fixnum digit total data time Kops/s bits bits (MiB) (seconds) From 34e0f05a31449c7629a429680abcd88fd1e5c19c Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Thu, 30 May 2019 22:24:54 +0000 Subject: [PATCH 03/10] NDEBUG only for test-suite and check for Gtest --- CMakeLists.txt | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bfd8120..535746f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,12 +20,25 @@ string(REPLACE ";" " " CUDA_ARCH_FLAGS_SPACES "${CUDA_ARCH_FLAGS}") string(APPEND CMAKE_CUDA_FLAGS " ${CUDA_ARCH_FLAGS_SPACES}") string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler -Wall,-Wextra") set(CMAKE_CUDA_FLAGS_DEBUG "-g -G") -add_definitions(-DNDEBUG) -# CODE +# source include_directories(src) -add_executable(bench bench/bench.cu) -add_executable(check tests/test-suite.cu) +# bench +add_executable(bench bench/bench.cu) target_link_libraries(bench -lstdc++) -target_link_libraries(check -lstdc++ -lgtest) + +# test-suite +find_package(GTest) +if(GTEST_FOUND) + add_executable(check tests/test-suite.cu) + target_link_libraries(check -lstdc++ -lgtest) + target_compile_definitions(check PUBLIC -DNDEBUG) +else() + message(WARNING "GTest libraries not found - not building test-suite") +endif() + +# FIXME: Add test runs +# enable_testing() +# add_test(NAME mytest +# COMMAND mytest) From 74243d5930286525072a8783d687252afe1ccbe3 Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Thu, 30 May 2019 22:36:11 +0000 Subject: [PATCH 04/10] -lineinfo --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 535746f..efa6abc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,7 @@ if(GTEST_FOUND) add_executable(check tests/test-suite.cu) target_link_libraries(check -lstdc++ -lgtest) target_compile_definitions(check PUBLIC -DNDEBUG) + target_compile_options(check PUBLIC -lineinfo) else() message(WARNING "GTest libraries not found - not building test-suite") endif() From 46a0db806fe7a3dbc75e4c99ea186236a1c46172 Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Fri, 31 May 2019 01:17:55 +0000 Subject: [PATCH 05/10] move NDEBUG --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index efa6abc..4d422c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ include_directories(src) # bench add_executable(bench bench/bench.cu) +target_compile_definitions(bench PUBLIC -DNDEBUG) target_link_libraries(bench -lstdc++) # test-suite @@ -33,7 +34,6 @@ find_package(GTest) if(GTEST_FOUND) add_executable(check tests/test-suite.cu) target_link_libraries(check -lstdc++ -lgtest) - target_compile_definitions(check PUBLIC -DNDEBUG) target_compile_options(check PUBLIC -lineinfo) else() message(WARNING "GTest libraries not found - not building test-suite") From ca4165daea370540491d14d0a8c4bdd6b7da737b Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Fri, 31 May 2019 01:21:04 +0000 Subject: [PATCH 06/10] readd tests --- .gitignore | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index f5f17f6..dff9012 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,13 @@ # Build build +# Test input +tests/add_* +tests/sub_* +tests/mul_* +tests/modexp_* +tests/paillier_* + # Object files *.o *.ko From 5f7a92120304694ed554a6005d96b6972da778ed Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Fri, 31 May 2019 01:21:58 +0000 Subject: [PATCH 07/10] fix whitespace --- .gitignore | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index dff9012..cc43593 100644 --- a/.gitignore +++ b/.gitignore @@ -3,9 +3,9 @@ build # Test input tests/add_* -tests/sub_* -tests/mul_* -tests/modexp_* +tests/sub_* +tests/mul_* +tests/modexp_* tests/paillier_* # Object files From 5778c89df068b62059c7024806a4f588d1349309 Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Fri, 31 May 2019 01:23:25 +0000 Subject: [PATCH 08/10] typo, doesn't run --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 82d803e..fe77955 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,9 @@ cmake .. make bench ``` -The build system for cuda-fixnum use CMake. Create a working directory to build in and run cmake from there. Then you can run `make bench` to build the benchmarking program, or `make check` to build and run the test suite. The test suite requires the [Google Test framework](https://github.com/google/googletest) to be installed. +The build system for cuda-fixnum uses CMake. Create a working directory to build in and run cmake from there. +Then you can run `make bench` to build the benchmarking program, or `make check` to build and the test suite. +The test suite requires the [Google Test framework](https://github.com/google/googletest) to be installed. CMake attempts to detect and set the proper flags for you GPU architecture, but you can override them or cross compile if needed. From 5d2dfcc978d0108602e66b8b332e6e25055395bf Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Mon, 3 Jun 2019 04:29:12 +0000 Subject: [PATCH 09/10] GTEST DIR --- CMakeLists.txt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d422c6..3d538a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,9 @@ - -CMAKE_MINIMUM_REQUIRED(VERSION 3.8) +cmake_minimum_required(VERSION 3.8) project(cuda-fixnum CXX CUDA) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CUDA_STANDARD 11) - -SET(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/bin") +set(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/bin") # CUDA find_package(CUDA REQUIRED) @@ -33,7 +31,7 @@ target_link_libraries(bench -lstdc++) find_package(GTest) if(GTEST_FOUND) add_executable(check tests/test-suite.cu) - target_link_libraries(check -lstdc++ -lgtest) + target_link_libraries(check -lstdc++ -L${GTEST_INCLUDE_DIR} -lgtest) target_compile_options(check PUBLIC -lineinfo) else() message(WARNING "GTest libraries not found - not building test-suite") @@ -43,3 +41,9 @@ endif() # enable_testing() # add_test(NAME mytest # COMMAND mytest) + +get_cmake_property(_variableNames VARIABLES) +list (SORT _variableNames) +foreach (_variableName ${_variableNames}) + message(STATUS "${_variableName}=${${_variableName}}") +endforeach() From 1224be0810be87b335a2458c26d4323d40fe02ad Mon Sep 17 00:00:00 2001 From: Joel Krauska Date: Mon, 3 Jun 2019 04:30:31 +0000 Subject: [PATCH 10/10] comment out debug --- CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d538a5..e8c4c2d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,8 +42,9 @@ endif() # add_test(NAME mytest # COMMAND mytest) -get_cmake_property(_variableNames VARIABLES) -list (SORT _variableNames) -foreach (_variableName ${_variableNames}) - message(STATUS "${_variableName}=${${_variableName}}") -endforeach() +# DEBUG CMAKE Variables +#get_cmake_property(_variableNames VARIABLES) +#list (SORT _variableNames) +#foreach (_variableName ${_variableNames}) +# message(STATUS "${_variableName}=${${_variableName}}") +#endforeach()