Skip to content

Commit

Permalink
Move x86 CPUID code from cpuid.hpp to cpuid.cpp (#73)
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch authored Jun 22, 2024
1 parent f4f3c91 commit ae31111
Show file tree
Hide file tree
Showing 18 changed files with 268 additions and 246 deletions.
42 changes: 24 additions & 18 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ if(NOT isMultiConfig AND NOT CMAKE_BUILD_TYPE)
endif()

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(ENABLE_ASSERT "ENABLE_ASSERT")
list(APPEND PRIMECOUNT_COMPILE_DEFINITIONS "ENABLE_ASSERT")
endif()

# primecount binary source files #####################################
Expand Down Expand Up @@ -154,11 +154,26 @@ else()
set(LIB_SRC ${LIB_SRC} src/gourdon/AC.cpp)
endif()

# Check if compiler supports CPU multiarch ###########################

if(WITH_MULTIARCH)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_x86_popcnt.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vpopcnt.cmake")

if(multiarch_x86_popcnt OR multiarch_avx512_vpopcnt)
set(LIB_SRC ${LIB_SRC} src/x86/cpuid.cpp)
endif()

if(NOT multiarch_avx512_vpopcnt)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_arm_sve.cmake")
endif()
endif()

# Enable __float128 support (requires libquadmath) ###################

if(WITH_FLOAT128)
set(LIB_QUADMATH "quadmath")
set(HAVE_FLOAT128 "HAVE_FLOAT128")
list(APPEND PRIMECOUNT_LINK_LIBRARIES "quadmath")
list(APPEND PRIMECOUNT_COMPILE_DEFINITIONS "HAVE_FLOAT128")
endif()

# Use 32-bit integer division ########################################
Expand All @@ -168,7 +183,7 @@ endif()
# division. On most CPUs before 2020 this significantly
# improves performance.
if(WITH_DIV32)
set(ENABLE_DIV32 "ENABLE_DIV32")
list(APPEND PRIMECOUNT_COMPILE_DEFINITIONS "ENABLE_DIV32")
endif()

# Use -Wno-uninitialized with GCC compiler ###########################
Expand All @@ -188,15 +203,6 @@ include("${PROJECT_SOURCE_DIR}/cmake/compiler_supports_cpp11.cmake")

include("${PROJECT_SOURCE_DIR}/cmake/int128_t.cmake")

# Check if compiler supports x64 multiarch ###########################

if(WITH_MULTIARCH)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vpopcnt.cmake")
if(NOT multiarch_avx512_vpopcnt)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_arm_sve.cmake")
endif()
endif()

# Check for OpenMP ###################################################

if(WITH_OPENMP)
Expand Down Expand Up @@ -246,8 +252,8 @@ if(BUILD_SHARED_LIBS)
set_target_properties(libprimecount PROPERTIES SOVERSION ${PRIMECOUNT_VERSION_MAJOR})
set_target_properties(libprimecount PROPERTIES VERSION ${PRIMECOUNT_VERSION})
target_compile_options(libprimecount PRIVATE "${WNO_UNINITIALIZED}")
target_compile_definitions(libprimecount PRIVATE "${HAVE_FLOAT128}" "${DISABLE_INT128}" "${ENABLE_DIV32}" "${ENABLE_ASSERT}" "${ENABLE_MULTIARCH}" "${ENABLE_INT128_OPENMP_PATCH}")
target_link_libraries(libprimecount PRIVATE primesieve::primesieve "${LIB_OPENMP}" "${LIB_QUADMATH}" "${LIB_ATOMIC}")
target_compile_definitions(libprimecount PRIVATE ${PRIMECOUNT_COMPILE_DEFINITIONS})
target_link_libraries(libprimecount PRIVATE primesieve::primesieve ${PRIMECOUNT_LINK_LIBRARIES})

target_compile_features(libprimecount
PRIVATE
Expand All @@ -271,8 +277,8 @@ if(BUILD_STATIC_LIBS)
add_library(libprimecount-static STATIC ${LIB_SRC})
set_target_properties(libprimecount-static PROPERTIES OUTPUT_NAME primecount)
target_compile_options(libprimecount-static PRIVATE "${WNO_UNINITIALIZED}")
target_compile_definitions(libprimecount-static PRIVATE "${HAVE_FLOAT128}" "${DISABLE_INT128}" "${ENABLE_DIV32}" "${ENABLE_ASSERT}" "${ENABLE_MULTIARCH}" "${ENABLE_INT128_OPENMP_PATCH}")
target_link_libraries(libprimecount-static PRIVATE primesieve::primesieve "${LIB_OPENMP}" "${LIB_QUADMATH}" "${LIB_ATOMIC}")
target_compile_definitions(libprimecount-static PRIVATE ${PRIMECOUNT_COMPILE_DEFINITIONS})
target_link_libraries(libprimecount-static PRIVATE primesieve::primesieve ${PRIMECOUNT_LINK_LIBRARIES})

if(WITH_MSVC_CRT_STATIC)
set_target_properties(libprimecount-static PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded")
Expand Down Expand Up @@ -307,7 +313,7 @@ endif()
if(BUILD_PRIMECOUNT)
add_executable(primecount ${BIN_SRC})
target_link_libraries(primecount PRIVATE primecount::primecount primesieve::primesieve)
target_compile_definitions(primecount PRIVATE "${DISABLE_INT128}" "${ENABLE_DIV32}" "${ENABLE_ASSERT}" "${ENABLE_INT128_OPENMP_PATCH}")
target_compile_definitions(primecount PRIVATE ${PRIMECOUNT_COMPILE_DEFINITIONS})
target_compile_features(primecount PRIVATE cxx_auto_type)
install(TARGETS primecount DESTINATION ${CMAKE_INSTALL_BINDIR})

Expand Down
7 changes: 5 additions & 2 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
Changes in primecount-7.14, 2024-06-20
Changes in primecount-7.14, 2024-06-22

* Move x86 cpuid code from cpuid.hpp to src/x86/cpuid.cpp.
* int128_t.hpp: Rename namespace port to pstd (portable std namespace).
* popcnt.hpp: Improve GCC performance on x86 CPUs.
* Sieve.hpp: Tune AVX512 code.
* cpu_supports_popcnt.hpp: Simplify, move preprocessor checks to new multiarch_x86_popcnt.cmake.
* multiarch_avx512_vpopcnt.cmake: Tune AVX512 code.
* multiarch_x86_popcnt.cmake: Detect x86 POPCNT.
* CMakeLists.txt: Use CMake list for all compile time definitions.

Changes in primecount-7.13, 2024-04-15

Expand Down
10 changes: 6 additions & 4 deletions cmake/OpenMP.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,12 @@ if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
return 0;
}" OpenMP_with_libatomic)

if(NOT OpenMP_with_libatomic)
if(OpenMP_with_libatomic)
list(APPEND PRIMECOUNT_LINK_LIBRARIES "${LIB_ATOMIC}")
else()
set(LIB_ATOMIC "")

if (NOT DISABLE_INT128)
if(NOT DISABLE_INT128)
# As a last resort check if OpenMP supports int128_t if
# we include our <int128_OpenMP_patch.hpp> header.
# In this case OpenMP will use critical sections instead
Expand All @@ -118,7 +120,7 @@ if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
}" OpenMP_int128_patch)

if(OpenMP_int128_patch)
set(ENABLE_INT128_OPENMP_PATCH "ENABLE_INT128_OPENMP_PATCH")
list(APPEND PRIMECOUNT_COMPILE_DEFINITIONS "ENABLE_INT128_OPENMP_PATCH")
endif()
endif()
endif()
Expand All @@ -129,7 +131,7 @@ if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
# OpenMP has been tested successfully, enable it
if(OpenMP OR OpenMP_with_libatomic OR OpenMP_int128_patch)
if(TARGET OpenMP::OpenMP_CXX)
set(LIB_OPENMP "OpenMP::OpenMP_CXX")
list(APPEND PRIMECOUNT_LINK_LIBRARIES "OpenMP::OpenMP_CXX")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()
Expand Down
2 changes: 1 addition & 1 deletion cmake/int128_t.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ check_cxx_source_compiles("
}" int128)

if(NOT int128)
set(DISABLE_INT128 "DISABLE_INT128")
list(APPEND PRIMECOUNT_COMPILE_DEFINITIONS "DISABLE_INT128")
endif()

cmake_pop_check_state()
2 changes: 1 addition & 1 deletion cmake/multiarch_arm_sve.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ check_cxx_source_compiles("
" multiarch_arm_sve)

if(multiarch_arm_sve)
set(ENABLE_MULTIARCH "ENABLE_MULTIARCH_ARM_SVE")
list(APPEND PRIMECOUNT_COMPILE_DEFINITIONS "ENABLE_MULTIARCH_ARM_SVE")
endif()

cmake_pop_check_state()
8 changes: 4 additions & 4 deletions cmake/multiarch_avx512_vpopcnt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include(CheckCXXSourceCompiles)
include(CMakePushCheckState)

cmake_push_check_state()
set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}/include")
set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}")

check_cxx_source_compiles("
// GCC/Clang function multiversioning for AVX512 is not needed if
Expand All @@ -20,7 +20,7 @@ check_cxx_source_compiles("
Error: AVX512 BMI2 multiarch not needed!
#endif
#include <cpu_supports_avx512_bmi2.hpp>
#include <src/x86/cpuid.cpp>
#include <immintrin.h>
#include <stdint.h>
Expand Down Expand Up @@ -65,7 +65,7 @@ check_cxx_source_compiles("
uint64_t cnt = 0;
Sieve sieve;
if (cpu_supports_avx512_bmi2)
if (primecount::has_cpuid_avx512_bmi2())
cnt = sieve.count_avx512_bmi2(&array[0], 10);
else
cnt = sieve.count_default(&array[0], 10);
Expand All @@ -75,7 +75,7 @@ check_cxx_source_compiles("
" multiarch_avx512_vpopcnt)

if(multiarch_avx512_vpopcnt)
set(ENABLE_MULTIARCH "ENABLE_MULTIARCH_AVX512_BMI2")
list(APPEND PRIMECOUNT_COMPILE_DEFINITIONS "ENABLE_MULTIARCH_AVX512_BMI2")
endif()

cmake_pop_check_state()
53 changes: 53 additions & 0 deletions cmake/multiarch_x86_popcnt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# On x86 CPUs we need to enable the use of cpuid.cpp.
# If cpuid.cpp compiles we assume it is a x86 CPU.

include(CheckCXXSourceCompiles)
include(CMakePushCheckState)

cmake_push_check_state()
set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}")

check_cxx_source_compiles("
// Enable CPUID for POPCNT on x86 and x86-64 CPUs.
// This is required because not all x86 and x86-64 CPUs
// support the POPCNT instruction.
#if !(defined(__x86_64__) || \
defined(__i386__) || \
defined(_M_X64) || \
defined(_M_IX86))
Error: x86 POPCNT multiarch not needed!
#endif
// Both GCC and Clang (even Clang on Windows) define the __POPCNT__
// macro if the user compiles with -mpopcnt. The __POPCNT__
// macro is even defined if the user compiles with other flags
// such as -mavx or -march=native.
#if defined(__POPCNT__)
Error: x86 POPCNT multiarch not needed!
// The MSVC compiler does not support a POPCNT macro, but if the user
// compiles with e.g. /arch:AVX or /arch:AVX512 then MSVC defines
// the __AVX__ macro and POPCNT is also supported.
#elif defined(_MSC_VER) && defined(__AVX__)
Error: x86 POPCNT multiarch not needed!
#endif
#include <src/x86/cpuid.cpp>
#include <iostream>
int main()
{
if (primecount::has_cpuid_popcnt())
std::cout << \"CPU supports POPCNT!\" << std::endl;
else
std::cout << \"CPU does not support POPCNT!\" << std::endl;
return 0;
}
" multiarch_x86_popcnt)

if(multiarch_x86_popcnt)
list(APPEND PRIMECOUNT_COMPILE_DEFINITIONS "ENABLE_MULTIARCH_x86_POPCNT")
endif()

cmake_pop_check_state()
72 changes: 4 additions & 68 deletions include/cpu_supports_avx512_bmi2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,80 +11,16 @@
#ifndef CPU_SUPPORTS_AVX512_BMI2_HPP
#define CPU_SUPPORTS_AVX512_BMI2_HPP

#include <cpuid.hpp>
#include <stdint.h>
namespace primecount {

#if defined(_MSC_VER)
#include <immintrin.h>
#endif

// CPUID bits documentation:
// https://en.wikipedia.org/wiki/CPUID

// %ebx bit flags
#define bit_BMI2 (1 << 8)
#define bit_AVX512F (1 << 16)
bool has_cpuid_avx512_bmi2();

// %ecx bit flags
#define bit_AVX512_VPOPCNTDQ (1 << 14)

// xgetbv bit flags
#define XSTATE_SSE (1 << 1)
#define XSTATE_YMM (1 << 2)
#define XSTATE_ZMM (7 << 5)
} // namespace

namespace {

// Get Value of Extended Control Register
inline uint64_t get_xcr0()
{
#if defined(_MSC_VER)
return _xgetbv(0);
#else
uint32_t eax;
uint32_t edx;

__asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
return eax | (uint64_t(edx) << 32);
#endif
}

inline bool run_cpuid_avx512_bmi2()
{
int abcd[4];

run_cpuid(1, 0, abcd);

int osxsave_mask = (1 << 27);

// Ensure OS supports extended processor state management
if ((abcd[2] & osxsave_mask) != osxsave_mask)
return false;

uint64_t ymm_mask = XSTATE_SSE | XSTATE_YMM;
uint64_t zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM;
uint64_t xcr0 = get_xcr0();

// Check AVX OS support
if ((xcr0 & ymm_mask) != ymm_mask)
return false;

// Check AVX512 OS support
if ((xcr0 & zmm_mask) != zmm_mask)
return false;

run_cpuid(7, 0, abcd);

if ((abcd[1] & bit_BMI2) != bit_BMI2)
return false;

// AVX512F, AVX512VPOPCNTDQ
return ((abcd[1] & bit_AVX512F) == bit_AVX512F &&
(abcd[2] & bit_AVX512_VPOPCNTDQ) == bit_AVX512_VPOPCNTDQ);
}

/// Initialized at startup
bool cpu_supports_avx512_bmi2 = run_cpuid_avx512_bmi2();
bool cpu_supports_avx512_bmi2 = primecount::has_cpuid_avx512_bmi2();

} // namespace

Expand Down
40 changes: 4 additions & 36 deletions include/cpu_supports_popcnt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,49 +11,17 @@
#ifndef CPU_SUPPORTS_POPCNT_HPP
#define CPU_SUPPORTS_POPCNT_HPP

// Enable CPUID on x86 and x86-64 CPUs
#if defined(__x86_64__) || \
defined(__i386__) || \
defined(_M_X64) || \
defined(_M_IX86)

// Both GCC and Clang (even Clang on Windows) define the __POPCNT__
// macro if the user compiles with -mpopcnt. The __POPCNT__
// macro is even defined if the user compiles with other flags
// such as -mavx or -march=native.
#if defined(__POPCNT__)
#define HAS_POPCNT
// The MSVC compiler does not support a POPCNT macro, but if the user
// compiles with e.g. /arch:AVX or /arch:AVX512 then MSVC defines
// the __AVX__ macro and POPCNT is also supported.
#elif defined(_MSC_VER) && defined(__AVX__)
#define HAS_POPCNT
#endif
namespace primecount {

#if !defined(HAS_POPCNT)
bool has_cpuid_popcnt();

#include <cpuid.hpp>
#define ENABLE_CPUID_POPCNT
} // namespace

namespace {

inline bool run_cpuid_supports_popcnt()
{
int abcd[4];
run_cpuid(1, 0, abcd);

// %ecx POPCNT bit flag
// https://en.wikipedia.org/wiki/CPUID
int bit_POPCNT = 1 << 23;
return (abcd[2] & bit_POPCNT) == bit_POPCNT;
}

/// Initialized at startup
bool cpu_supports_popcnt = run_cpuid_supports_popcnt();
bool cpu_supports_popcnt = primecount::has_cpuid_popcnt();

} // namespace

#endif // !defined(HAS_POPCNT)
#endif // CPUID

#endif
Loading

0 comments on commit ae31111

Please sign in to comment.