diff --git a/.travis.yml b/.travis.yml index a08d2351c..aac635553 100644 --- a/.travis.yml +++ b/.travis.yml @@ -56,6 +56,16 @@ matrix: env: MATRIX_EVAL="CC=clang && CXX=clang++" addons: {apt: {packages: [*common_packages, ]}} + - name: Linux s390x GCC 7 + arch: s390x + env: MATRIX_EVAL="CC=gcc-7 && CXX=g++-7" + addons: {apt: {packages: [*common_packages, ]}} + + - name: Linux ppc64le GCC 7 + arch: ppc64le + env: MATRIX_EVAL="CC=gcc-7 && CXX=g++-7" + addons: {apt: {packages: [*common_packages, ]}} + script: - eval "${MATRIX_EVAL}" - lscpu diff --git a/compile.sh b/compile.sh new file mode 100755 index 000000000..0af7ef7c1 --- /dev/null +++ b/compile.sh @@ -0,0 +1,3 @@ +gcc -std=c17 -I/home/johannes/src/volk/include -I/home/johannes/src/volk/build/include -L/home/johannes/src/volk/build/lib -x c main.c -o mainvolkgnuc -lm -lvolk +clang -std=c17 -I/home/johannes/src/volk/include -I/home/johannes/src/volk/build/include -L/home/johannes/src/volk/build/lib -x c main.c -o mainvolkclangc -lm -lvolk +g++ -std=c++17 -I/home/johannes/src/volk/include -I/home/johannes/src/volk/build/include -L/home/johannes/src/volk/build/lib -x c++ main.cc -o mainvolkcpp -lm -lfmt -lvolk \ No newline at end of file diff --git a/include/volk/volk_common.h b/include/volk/volk_common.h index 70b94cbdd..d7bde1c5d 100644 --- a/include/volk/volk_common.h +++ b/include/volk/volk_common.h @@ -85,8 +85,9 @@ //////////////////////////////////////////////////////////////////////// // C-linkage declaration macros // FIXME: due to the usage of complex.h, require gcc for c-linkage +// Hope that extern "C" works for all relevant compilers nowadays. //////////////////////////////////////////////////////////////////////// -#if defined(__cplusplus) && (__GNUC__) +#if defined(__cplusplus) #define __VOLK_DECL_BEGIN extern "C" { #define __VOLK_DECL_END } #else diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h index 4d0efc4ba..96d99405f 100644 --- a/include/volk/volk_complex.h +++ b/include/volk/volk_complex.h @@ -26,55 +26,61 @@ * - lv_conj - take the conjugate of the complex number */ -#ifdef __cplusplus - -#include -#include - -typedef std::complex lv_8sc_t; -typedef std::complex lv_16sc_t; -typedef std::complex lv_32sc_t; -typedef std::complex lv_64sc_t; -typedef std::complex lv_32fc_t; -typedef std::complex lv_64fc_t; - -template -inline std::complex lv_cmake(const T& r, const T& i) -{ - return std::complex(r, i); -} - -template -inline typename T::value_type lv_creal(const T& x) -{ - return x.real(); -} - -template -inline typename T::value_type lv_cimag(const T& x) -{ - return x.imag(); -} - -template -inline T lv_conj(const T& x) -{ - return std::conj(x); -} - -#else /* __cplusplus */ - #include -#include - -typedef char complex lv_8sc_t; -typedef short complex lv_16sc_t; -typedef long complex lv_32sc_t; -typedef long long complex lv_64sc_t; -typedef float complex lv_32fc_t; -typedef double complex lv_64fc_t; +#include + +__VOLK_DECL_BEGIN +#ifndef _MSC_VER +// Obviously, we would love `typedef float complex lv_32fc_t` to work. +// However, this clashes with C++ definitions. +// error: expected initializer before ‘lv_32fc_t’ +// --> typedef float complex lv_32fc_t; +// https://stackoverflow.com/a/10540302 + +typedef char _Complex lv_8sc_t; +typedef short _Complex lv_16sc_t; +typedef long _Complex lv_32sc_t; +typedef long long _Complex lv_64sc_t; +typedef float _Complex lv_32fc_t; +typedef double _Complex lv_64fc_t; + +#else +// MSVC requires different treatment. +// https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=msvc-160 +// https://docs.microsoft.com/en-us/cpp/c-runtime-library/complex-math-support?view=msvc-160 +// Refer to `complex.h` in +// https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/ +// https://github.com/microsoft/STL/blob/main/stl/inc/complex + +typedef _Fcomplex lv_32fc_t; +typedef _Dcomplex lv_64fc_t; + +// typedef char _Complex lv_8sc_t; +typedef struct lv_8sc_t { + char _Val[2]; +} lv_8sc_t; + +// typedef short _Complex lv_16sc_t; +typedef struct lv_16sc_t { + short _Val[2]; +} lv_16sc_t; + +// typedef long _Complex lv_32sc_t; +typedef struct lv_32sc_t { + long _Val[2]; +} lv_32sc_t; + +// typedef long long _Complex lv_64sc_t; +typedef struct lv_64sc_t { + long long _Val[2]; +} lv_64sc_t; +#endif #define lv_cmake(r, i) ((r) + _Complex_I * (i)) +// We want `_Imaginary_I` to ensure the correct sign. +// https://en.cppreference.com/w/c/numeric/complex/Imaginary_I +// It does not compile. Complex numbers are a terribly implemented afterthought. +// #define lv_cmake(r, i) ((r) + _Imaginary_I * (i)) // When GNUC is available, use the complex extensions. // The extensions always return the correct value type. @@ -93,6 +99,7 @@ typedef double complex lv_64fc_t; // with type-generic versions. #else /* __GNUC__ */ + #define lv_creal(x) (creal(x)) #define lv_cimag(x) (cimag(x)) @@ -101,6 +108,6 @@ typedef double complex lv_64fc_t; #endif /* __GNUC__ */ -#endif /* __cplusplus */ +__VOLK_DECL_END #endif /* INCLUDE_VOLK_COMPLEX_H */ diff --git a/lib/kernel_tests.h b/lib/kernel_tests.h index dc3484127..dbac3084d 100644 --- a/lib/kernel_tests.h +++ b/lib/kernel_tests.h @@ -42,7 +42,9 @@ std::vector init_test_list(volk_test_params_t test_params) test_params_power.set_scalar(2.5); volk_test_params_t test_params_rotator(test_params); - test_params_rotator.set_scalar(std::polar(1.0f, 0.1f)); + auto rotator_value = std::polar(1.0f, 0.1f); + test_params_rotator.set_scalar( + lv_32fc_t{ rotator_value.real(), rotator_value.imag() }); test_params_rotator.set_tol(1e-3); std::vector test_cases; diff --git a/lib/qa_utils.cc b/lib/qa_utils.cc index 378d544d1..1f0a47162 100644 --- a/lib/qa_utils.cc +++ b/lib/qa_utils.cc @@ -636,7 +636,7 @@ bool run_volk_tests(volk_func_desc_t desc, } else { run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], - scalar.real(), + __real__ scalar, vlen, iter, arch_list[i]); @@ -659,7 +659,7 @@ bool run_volk_tests(volk_func_desc_t desc, } else { run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], - scalar.real(), + __real__ scalar, vlen, iter, arch_list[i]); @@ -682,7 +682,7 @@ bool run_volk_tests(volk_func_desc_t desc, } else { run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], - scalar.real(), + __real__ scalar, vlen, iter, arch_list[i]); diff --git a/lib/volk_rank_archs.h b/lib/volk_rank_archs.h index 0a6c2e117..e8ae1a3df 100644 --- a/lib/volk_rank_archs.h +++ b/lib/volk_rank_archs.h @@ -12,10 +12,9 @@ #include #include +#include -#ifdef __cplusplus -extern "C" { -#endif +__VOLK_DECL_BEGIN int volk_get_index(const char* impl_names[], // list of implementations by name const size_t n_impls, // number of implementations available @@ -30,7 +29,6 @@ int volk_rank_archs(const char* kern_name, // name of the kernel to rank const bool align // if false, filter aligned implementations ); -#ifdef __cplusplus -} -#endif +__VOLK_DECL_END + #endif /*INCLUDED_VOLK_RANK_ARCHS_H*/ diff --git a/main.c b/main.c new file mode 100644 index 000000000..de3e69a5b --- /dev/null +++ b/main.c @@ -0,0 +1,98 @@ + +#include +#include +#include + +void function_test(int num_points) +{ + unsigned int alignment = volk_get_alignment(); + lv_32fc_t* in0 = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t) * num_points, alignment); + lv_32fc_t* in1 = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t) * num_points, alignment); + lv_32fc_t* out = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t) * num_points, alignment); + + for (unsigned int ii = 0; ii < num_points; ++ii) { + // Generate two tones + float real_1 = cosf(0.3f * (float)ii); + float imag_1 = sinf(0.3f * (float)ii); + in0[ii] = lv_cmake(real_1, imag_1); + float real_2 = cosf(0.1f * (float)ii); + float imag_2 = sinf(0.1f * (float)ii); + in1[ii] = lv_cmake(real_2, imag_2); + } + + volk_32fc_x2_multiply_32fc(out, in0, in1, num_points); + + for (unsigned int ii = 0; ii < num_points; ++ii) { + lv_32fc_t v0 = in0[ii]; + lv_32fc_t v1 = in1[ii]; + lv_32fc_t o = out[ii]; + printf("in0=(%+.1f%+.1fj), in1=(%+.1f%+.1fj), out=(%+.1f%+.1fj)\n", + creal(v0), + cimag(v0), + creal(v1), + cimag(v1), + creal(o), + cimag(o)); + } + + volk_free(in0); + volk_free(in1); + volk_free(out); +} + +int main(int argc, char* argv[]) +{ + function_test(32); + + lv_32fc_t fc_cpl[4]; + printf("float=%lu, complex float=%lu, complex float array[4]=%lu\n", + sizeof(float), + sizeof(lv_32fc_t), + sizeof(fc_cpl)); + + for (int i = 0; i < 4; i++) { + fc_cpl[i] = (i + 3) + I * (i + 8); + + fc_cpl[i] = lv_cmake(i + 3, i + 8); + } + for (int i = 0; i < 4; i++) { + lv_32fc_t val = fc_cpl[i]; + lv_32fc_t cval = conj(val); + lv_32fc_t gval = ~val; + lv_32fc_t mult = val * val; + printf("val = %+.1f%+.1fj\n", creal(val), cimag(val)); + printf("conj(val)= %+.1f%+.1fj\n", creal(cval), cimag(cval)); + printf("gcc: ~val= %+.1f%+.1fj\n", creal(gval), cimag(gval)); + printf("val*val = %+.1f%+.1fj\n", creal(mult), cimag(mult)); + } + + lv_8sc_t sc_cpl[4]; + printf("\n\nchar=%lu, complex char=%lu, complex char array[4]=%lu\n", + sizeof(char), + sizeof(lv_8sc_t), + sizeof(sc_cpl)); + + for (int i = 0; i < 4; i++) { + // lv_8sc_t value = (i + 3) + I * (i + 8); + // printf("value=%+hhi%+hhij\n", creal(value), cimag(value)); + // sc_cpl[i] = (i + 3) + I * (i + 8); + sc_cpl[i] = lv_cmake(i + 3, i + 8); + // printf("%i + j %i\n", creal(sc_cpl[i]), cimag(sc_cpl[i])); + } + for (int i = 0; i < 4; i++) { + lv_8sc_t val = sc_cpl[i]; + lv_8sc_t cval = conj(val); + // lv_8sc_t cval = lv_cmake(creal(val), -cimag(val)); + lv_8sc_t gval = ~val; + lv_8sc_t mult = val * val; + printf("val = %+hhi%+hhij\n", __real__ val, __imag__ val); + printf("conj(val)= %+hhi%+hhij\n", __real__ cval, __imag__ cval); + printf("gcc: ~val= %+hhi%+hhij\n", __real__ gval, __imag__ gval); + printf("val*val = %+hhi%+hhij\n", __real__ mult, __imag__ mult); + } + + // char* values = (char*) sc_cpl; + // for (int i = 0; i < 8; i++) { + // printf("%hhi\n", values[i]); + // } +} diff --git a/main.cc b/main.cc new file mode 100644 index 000000000..59898093d --- /dev/null +++ b/main.cc @@ -0,0 +1,200 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * These type definitions are in line with our C definitions. + * + * Alternativele, we could go with the NumPy scheme: + * np.complex64 aka std::complex + * np.complex128 aka std::complex + * The underlying types are probably defined like Ctypes. + * This is about the idea. + */ +typedef std::complex ic8; +typedef std::complex ic16; +typedef std::complex ic32; +typedef std::complex ic64; +typedef std::complex fc32; +typedef std::complex fc64; + +#include +#include + +/* C++ Interface requirements + * + * 1. Make C++ STL types usable `std::vector`, `std::complex`. + * 2. Make aligned vectors aka `volk::vector` usable. + * 3. Allow call-by-pointer for GR buffer interface usage etc. + * + * These requirements result in at least 3 functions. + * We might want to think about fancy new C++ features e.g. concepts to consolidate these. + */ + +namespace volk { + +/* + * Start of wrapper for volk_32fc_s32fc_multiply_32fc + */ +void cppscalarmultiply_pointers(fc32* result, + const fc32* input0, + const fc32 scalar, + const unsigned int num_points) +{ + volk_32fc_s32fc_multiply_32fc(reinterpret_cast(result), + reinterpret_cast(input0), + lv_32fc_t{ scalar.real(), scalar.imag() }, + num_points); +} + +void cppscalarmultiply_stl_vector(std::vector& result, + const std::vector& input0, + const fc32 scalar) +{ + unsigned int num_points = std::min({ result.size(), input0.size() }); + cppscalarmultiply_pointers(result.data(), input0.data(), scalar, num_points); +} + +void cppscalarmultiply_aligned_vector(volk::vector& result, + const volk::vector& input0, + const fc32 scalar) +{ + unsigned int num_points = std::min({ result.size(), input0.size() }); + cppscalarmultiply_pointers(result.data(), input0.data(), scalar, num_points); +} + +/* + * Start of wrapper for volk_32fc_x2_multiply_32fc + */ +void cppmultiply_pointers(fc32* result, + const fc32* input0, + const fc32* input1, + const unsigned int num_points) +{ + volk_32fc_x2_multiply_32fc(reinterpret_cast(result), + reinterpret_cast(input0), + reinterpret_cast(input1), + num_points); +} + +void cppmultiply_stl_vector(std::vector& result, + const std::vector& input0, + const std::vector& input1) +{ + unsigned int num_points = std::min({ result.size(), input0.size(), input1.size() }); + cppmultiply_pointers(result.data(), input0.data(), input1.data(), num_points); +} + +void cppmultiply_aligned_vector(volk::vector& result, + const volk::vector& input0, + const volk::vector& input1) +{ + unsigned int num_points = std::min({ result.size(), input0.size(), input1.size() }); + cppmultiply_pointers(result.data(), input0.data(), input1.data(), num_points); +} + +} // namespace volk + + +std::vector fill_vector(int num_points, float step_value) +{ + std::vector vec(num_points); + + for (unsigned int ii = 0; ii < num_points; ++ii) { + float real_1 = std::cos(step_value * (float)ii); + float imag_1 = std::sin(step_value * (float)ii); + vec[ii] = fc32(real_1, imag_1); + } + return vec; +} + +void function_test_vectors(int num_points) +{ + std::vector uin0(fill_vector(num_points, 0.3f)); + volk::vector in0(uin0.begin(), uin0.end()); + std::vector uin1(fill_vector(num_points, 0.1f)); + volk::vector in1(uin1.begin(), uin1.end()); + std::vector uout(num_points); + volk::vector out(num_points); + + volk::cppmultiply_aligned_vector(out, in0, in1); + + volk::cppmultiply_stl_vector(uout, uin0, uin1); + volk::cppmultiply_pointers(uout.data(), in0.data(), in1.data(), num_points); + + for (int ii = 0; ii < num_points; ++ii) { + fc32 v0 = in0[ii]; + fc32 v1 = in1[ii]; + fc32 o = out[ii]; + + fmt::print( + "in0=({:+.1f}{:+.1f}j), in1=({:+.1f}{:+.1f}j), out=({:+.1f}{:+.1f}j)\n", + std::real(v0), + std::imag(v0), + std::real(v1), + std::imag(v1), + std::real(o), + std::imag(o)); + } +} + +void function_test_with_scalar(int num_points) +{ + std::vector uin0(fill_vector(num_points, 0.3f)); + volk::vector in0(uin0.begin(), uin0.end()); + fc32 scalar{ 0.5f, 4.3f }; + std::vector uout(num_points); + volk::vector out(num_points); + + volk::cppscalarmultiply_aligned_vector(out, in0, scalar); + + volk::cppscalarmultiply_stl_vector(uout, uin0, scalar); + volk::cppscalarmultiply_pointers(uout.data(), in0.data(), scalar, num_points); + + fmt::print("scalar=({:+.1f}{:+.1f}j)\n", std::real(scalar), std::imag(scalar)); + for (int ii = 0; ii < num_points; ++ii) { + fc32 v0 = in0[ii]; + fc32 o = out[ii]; + + fmt::print("in0=({:+.1f}{:+.1f}j), out=({:+.1f}{:+.1f}j)\n", + std::real(v0), + std::imag(v0), + std::real(o), + std::imag(o)); + } +} + +int main(int argc, char* argv[]) +{ + fmt::print("Vector function test\n"); + function_test_vectors(16); + + fmt::print("Scalar function test\n"); + function_test_with_scalar(16); + + lv_32fc_t fc_cpl[4]; + fmt::print("float={}, complex float={}, complex float array[4]={}\n", + sizeof(float), + sizeof(lv_32fc_t), + sizeof(fc_cpl)); + + + std::vector vec(4); + for (int i = 0; i < 4; i++) { + auto foo = std::complex((i + 3), (i + 8)); + fmt::print("std::complex: ({:+.1f}{:+.1f}j)\n", std::real(foo), std::imag(foo)); + lv_32fc_t bar = lv_32fc_t{ 5, 6 }; + vec.at(i) = bar; + } + + for (auto& val : vec) { + float r = __real__ val; + float i = __imag__ val; + fmt::print("sizeof(val)={}, {:+.1f}{:+.1f}j\n", sizeof(val), r, i); + } +} diff --git a/tmpl/volk_typedefs.tmpl.h b/tmpl/volk_typedefs.tmpl.h index 2600c642c..a1dad61a7 100644 --- a/tmpl/volk_typedefs.tmpl.h +++ b/tmpl/volk_typedefs.tmpl.h @@ -10,11 +10,17 @@ #ifndef INCLUDED_VOLK_TYPEDEFS #define INCLUDED_VOLK_TYPEDEFS + #include #include +#include + +__VOLK_DECL_BEGIN %for kern in kernels: typedef void (*${kern.pname})(${kern.arglist_types}); %endfor +__VOLK_DECL_END + #endif /*INCLUDED_VOLK_TYPEDEFS*/