Skip to content

Commit

Permalink
3.0-rc1 (#105)
Browse files Browse the repository at this point in the history
* Do some backup

* Do some backup

* Do some backup

* Many fixes for C11 API

* Add support for C11 to tests

* Rewrite pack.md

* Finish documentation on packs

* Add += *= /= ... to C++ advanced API

* Forgot some files

* Add C++ advanced API aliases

* Git more files

* Begin adding Sleef big math functions to NSIMD

* Add doc for +=, -=, ... and for function aliases

* Backup

* Backup before power cut

* src/ulps.cpp

* More on ULPs

* More on ULPs

* Fixes in ULPs doc

* finished with denormal in doc

* Add UFP related functions but no test for now

* Add support for scalar math functions

* Add test for UFP

* Integration of sine (ulp 3.5) from Sleef seems to work

* Backup

* Compilation of all tests seems fine

* Remove fixed_point module own operator list and make it use the central one

* Tests passed for CPU

* It seems to work for Initel

* New tests system

* Add Sleef code

* Add emulation layer for SIMD not supporting double

* Fix support of SIMD emulation for doubles

* Fix cmake build system

* Improve andnot doc

* Fix generation of Sleef files

* Replace SVE _z intrinsics by _x ones

* Fix SVE implementation

* For backup: beginning of merging PPC from xberault

* For backup: continuing merging PPC from xberault

* More on VMX and VSX

* Fix what_is_wrapped

* Doc seems to OK

* VMX compiles but with a lot of warnings

* Refurbishing implementation of platform ppc

* Refurbishing implementation of platform ppc

* Refurbishing implementation of platform ppc

* Refurbishing implementation of platform ppc

* Refurbishing implementation of platform ppc

* Fixes for PPC

* No compilation with Xlc

* Fix warnings when compiling C++/PPC code + update CONTRIBUTING with PPC

* Update documentation

* Many fixes to ppc

* Fixes for VSX

* More fixes for PPC

* Fix PPC

* Test

* Fix README

* Beginning integrating oneAPI

* Backup oneAPI implementation

* Freeze for WASM

* Fix oneAPI SPMD

* All tests are ok for oneAPI

* Fix various things

* Fixes for CUDA

* Fix for ROCm

* Add contributors

* Update README

* Backup from time to time

* Gather all before rc1

Co-authored-by: gquintin <[email protected]>
  • Loading branch information
gquintin and gquintin authored Oct 14, 2021
1 parent a50ce2f commit 9d4b825
Show file tree
Hide file tree
Showing 101 changed files with 35,061 additions and 5,415 deletions.
160 changes: 142 additions & 18 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# SOFTWARE.

cmake_minimum_required(VERSION 3.0.2)
project(NSIMD VERSION 2.2 LANGUAGES CXX)
project(NSIMD VERSION 3.0 LANGUAGES C CXX)

# -----------------------------------------------------------------------------
# First check that NSIMD code has been generated
Expand Down Expand Up @@ -62,8 +62,11 @@ function(nsimd_get_compiler_argument simd_ext argument)
set(mapping_sve512 "/DSVE512")
set(mapping_sve1024 "/DSVE1024")
set(mapping_sve2048 "/DSVE2048")
set(mapping_vmx "/DVMX")
set(mapping_vsx "/DVSX")
set(mapping_cuda "/DCUDA")
set(mapping_rocm "/DROCM")
set(mapping_oneapi "/ONEAPI")
else()
set(mapping_sse2 "-DSSE2;-msse2" )
set(mapping_sse42 "-DSSE42;-msse4.2" )
Expand All @@ -89,8 +92,11 @@ function(nsimd_get_compiler_argument simd_ext argument)
";-msve-vector-bits=1024")
set(mapping_sve2048 "-DSVE2048 -march=armv8.2-a+sve"
";-msve-vector-bits=2048")
set(mapping_vmx "-DVMX;-mcpu=powerpc64le;-maltivec")
set(mapping_vsx "-DVSX;-mcpu=powerpc64le;-mvsx")
set(mapping_cuda "-DCUDA")
set(mapping_rocm "-DROCM")
set(mapping_oneapi "-DONEAPI")
endif()
if (DEFINED mapping_${simd_ext})
set(${argument} "${mapping_${simd_ext}}" PARENT_SCOPE)
Expand All @@ -111,51 +117,109 @@ nsimd_get_compiler_argument(${simd} NSIMD_COMPILATION_OPTIONS)
# -----------------------------------------------------------------------------
# Object file selection

set(NSIMD_OBJS "fp16;memory;ulps;api_cpu")
set(NSIMD_OBJS "fp16;gpu;memory;api_cpu;rempitab;sleefsp;sleefdp")

if ("${simd}" STREQUAL "sse2")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;sleef_sse2_f32;sleef_sse2_f64")
elseif ("${simd}" STREQUAL "sse42")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;"
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64")
elseif ("${simd}" STREQUAL "avx")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;"
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64;"
"sleef_avx_f32;sleef_avx_f64")
elseif ("${simd}" STREQUAL "avx2")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2;"
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64;"
"sleef_avx_f32;sleef_avx_f64;"
"sleef_avx2_f32;sleef_avx2_f64")
elseif ("${simd}" STREQUAL "avx512_knl")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2"
";api_avx512_knl")
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64;"
"sleef_avx_f32;sleef_avx_f64;"
"sleef_avx2_f32;sleef_avx2_f64;"
"api_avx512_knl;sleef_avx512_knl_f32;sleef_avx512_knl_f64")
elseif ("${simd}" STREQUAL "avx512_skylake")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2"
";api_avx512_skylake")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2;"
"api_avx512_skylake;sleef_avx512_skylake_f32;"
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64;"
"sleef_avx_f32;sleef_avx_f64;"
"sleef_avx2_f32;sleef_avx2_f64;"
"sleef_avx512_skylake_f64")
elseif ("${simd}" STREQUAL "neon128")
set(NSIMD_OBJS "${NSIMD_OBJS};api_neon128")
set(NSIMD_OBJS "${NSIMD_OBJS};api_neon128;"
"sleef_neon128_f32;sleef_neon128_f64")
elseif ("${simd}" STREQUAL "aarch64")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;"
"sleef_aarch64_f32;sleef_aarch64_f64")
elseif ("${simd}" STREQUAL "sve")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve_f32;sleef_sve_f64")
elseif ("${simd}" STREQUAL "sve128")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve128")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve128;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve128_f32;sleef_sve128_f64")
elseif ("${simd}" STREQUAL "sve256")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve256")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve256;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve256_f32;sleef_sve256_f64")
elseif ("${simd}" STREQUAL "sve512")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve512")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve512;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve512_f32;sleef_sve512_f64")
elseif ("${simd}" STREQUAL "sve1024")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve1024")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve1024;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve1024_f32;sleef_sve1024_f64")
elseif ("${simd}" STREQUAL "sve2048")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve2048")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve2048;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve2048_f32;sleef_sve2048_f64")
elseif ("${simd}" STREQUAL "vmx")
set(NSIMD_OBJS "${NSIMD_OBJS};api_vmx;sleef_vmx_f32;sleef_vmx_f64")
elseif ("${simd}" STREQUAL "vsx")
set(NSIMD_OBJS "${NSIMD_OBJS};api_vmx;api_vsx;sleef_vmx_f32;sleef_vmx_f64;"
"sleef_vsx_f32;sleef_vmx_f64")
endif()

# -----------------------------------------------------------------------------
# Rules for building the library

set(NSIMD_LIB_DEPS "")
foreach(o ${NSIMD_OBJS})
add_library(${o} OBJECT src/${o}.cpp)
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.cpp")
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.cpp")
elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.c")
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.c")
elseif(("${o}" STREQUAL "sleef_neon128_f64") OR
("${o}" STREQUAL "sleef_vmx_f64"))
add_library(${o} OBJECT
"${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimddp_emulation.c")
elseif("${o}" STREQUAL "sleef_vmx_f32")
add_library(${o} OBJECT
"${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimdsp_emulation.c")
elseif(o MATCHES "sleef_.*_f32")
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimdsp.c")
elseif(o MATCHES "sleef_.*_f64")
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimddp.c")
endif()
if (MSVC)
set(sleef_cflags "/DNDEBUG;/DDORENAME=1")
else()
set(sleef_cflags "-DNDEBUG;-DDORENAME=1")
endif()
set_property(TARGET ${o} PROPERTY POSITION_INDEPENDENT_CODE ON)
target_include_directories(${o} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
if (MSVC)
target_compile_definitions(${o} PUBLIC "/D_CRT_SECURE_NO_WARNINGS")
endif()
set(buf "")
if ("${o}" STREQUAL "api_sse2")
nsimd_get_compiler_argument("sse2" buf)
elseif ("${o}" STREQUAL "api_sse42")
Expand Down Expand Up @@ -184,15 +248,75 @@ foreach(o ${NSIMD_OBJS})
nsimd_get_compiler_argument("sve1024" buf)
elseif ("${o}" STREQUAL "api_sve2048")
nsimd_get_compiler_argument("sve2048" buf)
elseif ("${o}" STREQUAL "api_vmx")
nsimd_get_compiler_argument("vmx" buf)
elseif ("${o}" STREQUAL "api_vsx")
nsimd_get_compiler_argument("vsx" buf)
elseif ("${o}" STREQUAL "api_cuda")
nsimd_get_compiler_argument("cuda" buf)
elseif ("${o}" STREQUAL "api_rocm")
nsimd_get_compiler_argument("rocm" buf)
elseif ("${o}" STREQUAL "api_cpu")
nsimd_get_compiler_argument("cpu" buf)
elseif ("${o}" STREQUAL "rempitab")
list(APPEND buf "${sleef_cflags}")
elseif ("${o}" STREQUAL "sleefsp")
list(APPEND buf "${sleef_cflags}")
elseif ("${o}" STREQUAL "sleefdp")
list(APPEND buf "${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sse2_")
nsimd_get_compiler_argument("sse2" buf)
list(APPEND buf "-DNSIMD_SSE2;-DENABLE_SSE2=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sse42_")
nsimd_get_compiler_argument("sse42" buf)
list(APPEND buf "-DNSIMD_SSE42;-DENABLE_SSE4=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_avx_")
nsimd_get_compiler_argument("avx" buf)
list(APPEND buf "-DNSIMD_AVX;-DENABLE_AVX=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_avx2_")
nsimd_get_compiler_argument("avx2" buf)
list(APPEND buf "-DNSIMD_AVX2;-DENABLE_AVX2=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_avx512_knl_")
nsimd_get_compiler_argument("avx512_knl" buf)
list(APPEND buf "-DNSIMD_AVX512_KNL;-DENABLE_AVX512F=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_avx512_skylake_")
nsimd_get_compiler_argument("avx512_skylake" buf)
list(APPEND buf
"-DNSIMD_AVX512_SKYLAKE;-DENABLE_AVX512F=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_neon128_")
nsimd_get_compiler_argument("neon128" buf)
list(APPEND buf "-DNSIMD_NEON128;-DENABLE_NEON32=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_aarch64_")
nsimd_get_compiler_argument("aarch64" buf)
list(APPEND buf "-DNSIMD_AARCH64;-DENABLE_ADVSIMD=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve_")
nsimd_get_compiler_argument("sve" buf)
list(APPEND buf "-DNSIMD_SVE;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve128_")
nsimd_get_compiler_argument("sve128" buf)
list(APPEND buf "-DNSIMD_SVE128;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve256_")
nsimd_get_compiler_argument("sve256" buf)
list(APPEND buf "-DNSIMD_SVE256;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve512_")
nsimd_get_compiler_argument("sve512" buf)
list(APPEND buf "-DNSIMD_SVE512;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve1024_")
nsimd_get_compiler_argument("sve1024" buf)
list(APPEND buf "-DNSIMD_SVE1024;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve2048_")
nsimd_get_compiler_argument("sve2048" buf)
list(APPEND buf "-DNSIMD_SVE2048;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_vmx_")
nsimd_get_compiler_argument("vmx" buf)
list(APPEND buf "-DNSIMD_VMX;-DENABLE_VSX=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_vsx_")
nsimd_get_compiler_argument("vsx" buf)
list(APPEND buf "-DNSIMD_VSX;-DENABLE_VSX=1;${sleef_cflags}")
else()
set(buf "")
endif()
message(STATUS "DEBUG: ${o} --> ${buf}")
if (NOT "${buf}" STREQUAL "")
target_compile_options(${o} PUBLIC "${buf}")
endif()
Expand Down
Loading

0 comments on commit 9d4b825

Please sign in to comment.