diff --git a/.travis.yml b/.travis.yml
index a08d2351c..aac635553 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -56,6 +56,16 @@ matrix:
       env: MATRIX_EVAL="CC=clang && CXX=clang++"
       addons: {apt: {packages: [*common_packages, ]}}
 
+    - name: Linux s390x GCC 7
+      arch: s390x
+      env: MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
+      addons: {apt: {packages: [*common_packages, ]}}
+
+    - name: Linux ppc64le GCC 7
+      arch: ppc64le
+      env: MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
+      addons: {apt: {packages: [*common_packages, ]}}
+      
 script:
   - eval "${MATRIX_EVAL}"
   - lscpu
diff --git a/compile.sh b/compile.sh
new file mode 100755
index 000000000..0af7ef7c1
--- /dev/null
+++ b/compile.sh
@@ -0,0 +1,3 @@
+gcc -std=c17 -I/home/johannes/src/volk/include -I/home/johannes/src/volk/build/include -L/home/johannes/src/volk/build/lib -x c main.c -o mainvolkgnuc -lm -lvolk
+clang -std=c17 -I/home/johannes/src/volk/include -I/home/johannes/src/volk/build/include -L/home/johannes/src/volk/build/lib -x c main.c -o mainvolkclangc -lm -lvolk
+g++ -std=c++17 -I/home/johannes/src/volk/include -I/home/johannes/src/volk/build/include -L/home/johannes/src/volk/build/lib -x c++ main.cc -o mainvolkcpp -lm -lfmt -lvolk
\ No newline at end of file
diff --git a/include/volk/volk_common.h b/include/volk/volk_common.h
index 70b94cbdd..d7bde1c5d 100644
--- a/include/volk/volk_common.h
+++ b/include/volk/volk_common.h
@@ -85,8 +85,9 @@
 ////////////////////////////////////////////////////////////////////////
 // C-linkage declaration macros
 // FIXME: due to the usage of complex.h, require gcc for c-linkage
+// Hope that extern "C" works for all relevant compilers nowadays.
 ////////////////////////////////////////////////////////////////////////
-#if defined(__cplusplus) && (__GNUC__)
+#if defined(__cplusplus)
 #define __VOLK_DECL_BEGIN extern "C" {
 #define __VOLK_DECL_END }
 #else
diff --git a/include/volk/volk_complex.h b/include/volk/volk_complex.h
index 4d0efc4ba..96d99405f 100644
--- a/include/volk/volk_complex.h
+++ b/include/volk/volk_complex.h
@@ -26,55 +26,61 @@
  * - lv_conj - take the conjugate of the complex number
  */
 
-#ifdef __cplusplus
-
-#include <stdint.h>
-#include <complex>
-
-typedef std::complex<int8_t> lv_8sc_t;
-typedef std::complex<int16_t> lv_16sc_t;
-typedef std::complex<int32_t> lv_32sc_t;
-typedef std::complex<int64_t> lv_64sc_t;
-typedef std::complex<float> lv_32fc_t;
-typedef std::complex<double> lv_64fc_t;
-
-template <typename T>
-inline std::complex<T> lv_cmake(const T& r, const T& i)
-{
-    return std::complex<T>(r, i);
-}
-
-template <typename T>
-inline typename T::value_type lv_creal(const T& x)
-{
-    return x.real();
-}
-
-template <typename T>
-inline typename T::value_type lv_cimag(const T& x)
-{
-    return x.imag();
-}
-
-template <typename T>
-inline T lv_conj(const T& x)
-{
-    return std::conj(x);
-}
-
-#else /* __cplusplus */
-
 #include <complex.h>
-#include <tgmath.h>
-
-typedef char complex lv_8sc_t;
-typedef short complex lv_16sc_t;
-typedef long complex lv_32sc_t;
-typedef long long complex lv_64sc_t;
-typedef float complex lv_32fc_t;
-typedef double complex lv_64fc_t;
+#include <volk/volk_common.h>
+
+__VOLK_DECL_BEGIN
+#ifndef _MSC_VER
+// Obviously, we would love `typedef float complex lv_32fc_t` to work.
+// However, this clashes with C++ definitions.
+// error: expected initializer before ‘lv_32fc_t’
+//    --> typedef float complex lv_32fc_t;
+// https://stackoverflow.com/a/10540302
+
+typedef char _Complex lv_8sc_t;
+typedef short _Complex lv_16sc_t;
+typedef long _Complex lv_32sc_t;
+typedef long long _Complex lv_64sc_t;
+typedef float _Complex lv_32fc_t;
+typedef double _Complex lv_64fc_t;
+
+#else
+// MSVC requires different treatment.
+// https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=msvc-160
+// https://docs.microsoft.com/en-us/cpp/c-runtime-library/complex-math-support?view=msvc-160
+// Refer to `complex.h` in
+// https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/
+// https://github.com/microsoft/STL/blob/main/stl/inc/complex
+
+typedef _Fcomplex lv_32fc_t;
+typedef _Dcomplex lv_64fc_t;
+
+// typedef char _Complex lv_8sc_t;
+typedef struct lv_8sc_t {
+    char _Val[2];
+} lv_8sc_t;
+
+// typedef short _Complex lv_16sc_t;
+typedef struct lv_16sc_t {
+    short _Val[2];
+} lv_16sc_t;
+
+// typedef long _Complex lv_32sc_t;
+typedef struct lv_32sc_t {
+    long _Val[2];
+} lv_32sc_t;
+
+// typedef long long _Complex lv_64sc_t;
+typedef struct lv_64sc_t {
+    long long _Val[2];
+} lv_64sc_t;
+#endif
 
 #define lv_cmake(r, i) ((r) + _Complex_I * (i))
+// We want `_Imaginary_I` to ensure the correct sign.
+// https://en.cppreference.com/w/c/numeric/complex/Imaginary_I
+// It does not compile. Complex numbers are a terribly implemented afterthought.
+// #define lv_cmake(r, i) ((r) + _Imaginary_I * (i))
 
 // When GNUC is available, use the complex extensions.
 // The extensions always return the correct value type.
@@ -93,6 +99,7 @@ typedef double complex lv_64fc_t;
 // with type-generic versions.
 #else /* __GNUC__ */
 
+
 #define lv_creal(x) (creal(x))
 
 #define lv_cimag(x) (cimag(x))
@@ -101,6 +108,6 @@ typedef double complex lv_64fc_t;
 
 #endif /* __GNUC__ */
 
-#endif /* __cplusplus */
+__VOLK_DECL_END
 
 #endif /* INCLUDE_VOLK_COMPLEX_H */
diff --git a/lib/kernel_tests.h b/lib/kernel_tests.h
index dc3484127..dbac3084d 100644
--- a/lib/kernel_tests.h
+++ b/lib/kernel_tests.h
@@ -42,7 +42,9 @@ std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
     test_params_power.set_scalar(2.5);
 
     volk_test_params_t test_params_rotator(test_params);
-    test_params_rotator.set_scalar(std::polar(1.0f, 0.1f));
+    auto rotator_value = std::polar(1.0f, 0.1f);
+    test_params_rotator.set_scalar(
+        lv_32fc_t{ rotator_value.real(), rotator_value.imag() });
     test_params_rotator.set_tol(1e-3);
 
     std::vector<volk_test_case_t> test_cases;
diff --git a/lib/qa_utils.cc b/lib/qa_utils.cc
index 378d544d1..1f0a47162 100644
--- a/lib/qa_utils.cc
+++ b/lib/qa_utils.cc
@@ -636,7 +636,7 @@ bool run_volk_tests(volk_func_desc_t desc,
                 } else {
                     run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func),
                                         test_data[i],
-                                        scalar.real(),
+                                        __real__ scalar,
                                         vlen,
                                         iter,
                                         arch_list[i]);
@@ -659,7 +659,7 @@ bool run_volk_tests(volk_func_desc_t desc,
                 } else {
                     run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func),
                                         test_data[i],
-                                        scalar.real(),
+                                        __real__ scalar,
                                         vlen,
                                         iter,
                                         arch_list[i]);
@@ -682,7 +682,7 @@ bool run_volk_tests(volk_func_desc_t desc,
                 } else {
                     run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func),
                                         test_data[i],
-                                        scalar.real(),
+                                        __real__ scalar,
                                         vlen,
                                         iter,
                                         arch_list[i]);
diff --git a/lib/volk_rank_archs.h b/lib/volk_rank_archs.h
index 0a6c2e117..e8ae1a3df 100644
--- a/lib/volk_rank_archs.h
+++ b/lib/volk_rank_archs.h
@@ -12,10 +12,9 @@
 
 #include <stdbool.h>
 #include <stdlib.h>
+#include <volk/volk_common.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+__VOLK_DECL_BEGIN
 
 int volk_get_index(const char* impl_names[], // list of implementations by name
                    const size_t n_impls,     // number of implementations available
@@ -30,7 +29,6 @@ int volk_rank_archs(const char* kern_name,    // name of the kernel to rank
                     const bool align          // if false, filter aligned implementations
 );
 
-#ifdef __cplusplus
-}
-#endif
+__VOLK_DECL_END
+
 #endif /*INCLUDED_VOLK_RANK_ARCHS_H*/
diff --git a/main.c b/main.c
new file mode 100644
index 000000000..de3e69a5b
--- /dev/null
+++ b/main.c
@@ -0,0 +1,98 @@
+
+#include <math.h>
+#include <stdio.h>
+#include <volk/volk.h>
+
+void function_test(int num_points)
+{
+    unsigned int alignment = volk_get_alignment();
+    lv_32fc_t* in0 = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t) * num_points, alignment);
+    lv_32fc_t* in1 = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t) * num_points, alignment);
+    lv_32fc_t* out = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t) * num_points, alignment);
+
+    for (unsigned int ii = 0; ii < num_points; ++ii) {
+        // Generate two tones
+        float real_1 = cosf(0.3f * (float)ii);
+        float imag_1 = sinf(0.3f * (float)ii);
+        in0[ii] = lv_cmake(real_1, imag_1);
+        float real_2 = cosf(0.1f * (float)ii);
+        float imag_2 = sinf(0.1f * (float)ii);
+        in1[ii] = lv_cmake(real_2, imag_2);
+    }
+
+    volk_32fc_x2_multiply_32fc(out, in0, in1, num_points);
+
+    for (unsigned int ii = 0; ii < num_points; ++ii) {
+        lv_32fc_t v0 = in0[ii];
+        lv_32fc_t v1 = in1[ii];
+        lv_32fc_t o = out[ii];
+        printf("in0=(%+.1f%+.1fj), in1=(%+.1f%+.1fj), out=(%+.1f%+.1fj)\n",
+               creal(v0),
+               cimag(v0),
+               creal(v1),
+               cimag(v1),
+               creal(o),
+               cimag(o));
+    }
+
+    volk_free(in0);
+    volk_free(in1);
+    volk_free(out);
+}
+
+int main(int argc, char* argv[])
+{
+    function_test(32);
+
+    lv_32fc_t fc_cpl[4];
+    printf("float=%lu, complex float=%lu, complex float array[4]=%lu\n",
+           sizeof(float),
+           sizeof(lv_32fc_t),
+           sizeof(fc_cpl));
+
+    for (int i = 0; i < 4; i++) {
+        fc_cpl[i] = (i + 3) + I * (i + 8);
+
+        fc_cpl[i] = lv_cmake(i + 3, i + 8);
+    }
+    for (int i = 0; i < 4; i++) {
+        lv_32fc_t val = fc_cpl[i];
+        lv_32fc_t cval = conj(val);
+        lv_32fc_t gval = ~val;
+        lv_32fc_t mult = val * val;
+        printf("val      = %+.1f%+.1fj\n", creal(val), cimag(val));
+        printf("conj(val)= %+.1f%+.1fj\n", creal(cval), cimag(cval));
+        printf("gcc: ~val= %+.1f%+.1fj\n", creal(gval), cimag(gval));
+        printf("val*val  = %+.1f%+.1fj\n", creal(mult), cimag(mult));
+    }
+
+    lv_8sc_t sc_cpl[4];
+    printf("\n\nchar=%lu, complex char=%lu, complex char array[4]=%lu\n",
+           sizeof(char),
+           sizeof(lv_8sc_t),
+           sizeof(sc_cpl));
+
+    for (int i = 0; i < 4; i++) {
+        // lv_8sc_t value = (i + 3) + I * (i + 8);
+        // printf("value=%+hhi%+hhij\n", creal(value), cimag(value));
+        // sc_cpl[i] = (i + 3) + I * (i + 8);
+        sc_cpl[i] = lv_cmake(i + 3, i + 8);
+        // printf("%i + j %i\n", creal(sc_cpl[i]), cimag(sc_cpl[i]));
+    }
+    for (int i = 0; i < 4; i++) {
+        lv_8sc_t val = sc_cpl[i];
+        lv_8sc_t cval = conj(val);
+        // lv_8sc_t cval = lv_cmake(creal(val), -cimag(val));
+        lv_8sc_t gval = ~val;
+        lv_8sc_t mult = val * val;
+        printf("val      = %+hhi%+hhij\n", __real__ val, __imag__ val);
+        printf("conj(val)= %+hhi%+hhij\n", __real__ cval, __imag__ cval);
+        printf("gcc: ~val= %+hhi%+hhij\n", __real__ gval, __imag__ gval);
+        printf("val*val  = %+hhi%+hhij\n", __real__ mult, __imag__ mult);
+    }
+
+    //     char* values = (char*) sc_cpl;
+    //   for (int i = 0; i < 8; i++) {
+    //     printf("%hhi\n", values[i]);
+    //   }
+}
diff --git a/main.cc b/main.cc
new file mode 100644
index 000000000..59898093d
--- /dev/null
+++ b/main.cc
@@ -0,0 +1,200 @@
+#include <fmt/core.h>
+#include <algorithm>
+#include <cmath>
+#include <complex>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+
+/*
+ * These type definitions are in line with our C definitions.
+ *
+ * Alternativele, we could go with the NumPy scheme:
+ * np.complex64 aka std::complex<float>
+ * np.complex128 aka std::complex<double>
+ * The underlying types are probably defined like Ctypes.
+ * This is about the idea.
+ */
+typedef std::complex<int8_t> ic8;
+typedef std::complex<int16_t> ic16;
+typedef std::complex<int32_t> ic32;
+typedef std::complex<int64_t> ic64;
+typedef std::complex<float> fc32;
+typedef std::complex<double> fc64;
+
+#include <volk/volk.h>
+#include <volk/volk_alloc.hh>
+
+/* C++ Interface requirements
+ *
+ * 1. Make C++ STL types usable `std::vector`, `std::complex`.
+ * 2. Make aligned vectors aka `volk::vector` usable.
+ * 3. Allow call-by-pointer for GR buffer interface usage etc.
+ *
+ * These requirements result in at least 3 functions.
+ * We might want to think about fancy new C++ features e.g. concepts to consolidate these.
+ */
+
+namespace volk {
+
+/*
+ * Start of wrapper for volk_32fc_s32fc_multiply_32fc
+ */
+void cppscalarmultiply_pointers(fc32* result,
+                                const fc32* input0,
+                                const fc32 scalar,
+                                const unsigned int num_points)
+{
+    volk_32fc_s32fc_multiply_32fc(reinterpret_cast<lv_32fc_t*>(result),
+                                  reinterpret_cast<const lv_32fc_t*>(input0),
+                                  lv_32fc_t{ scalar.real(), scalar.imag() },
+                                  num_points);
+}
+
+void cppscalarmultiply_stl_vector(std::vector<fc32>& result,
+                                  const std::vector<fc32>& input0,
+                                  const fc32 scalar)
+{
+    unsigned int num_points = std::min({ result.size(), input0.size() });
+    cppscalarmultiply_pointers(result.data(), input0.data(), scalar, num_points);
+}
+
+void cppscalarmultiply_aligned_vector(volk::vector<fc32>& result,
+                                      const volk::vector<fc32>& input0,
+                                      const fc32 scalar)
+{
+    unsigned int num_points = std::min({ result.size(), input0.size() });
+    cppscalarmultiply_pointers(result.data(), input0.data(), scalar, num_points);
+}
+
+/*
+ * Start of wrapper for volk_32fc_x2_multiply_32fc
+ */
+void cppmultiply_pointers(fc32* result,
+                          const fc32* input0,
+                          const fc32* input1,
+                          const unsigned int num_points)
+{
+    volk_32fc_x2_multiply_32fc(reinterpret_cast<lv_32fc_t*>(result),
+                               reinterpret_cast<const lv_32fc_t*>(input0),
+                               reinterpret_cast<const lv_32fc_t*>(input1),
+                               num_points);
+}
+
+void cppmultiply_stl_vector(std::vector<fc32>& result,
+                            const std::vector<fc32>& input0,
+                            const std::vector<fc32>& input1)
+{
+    unsigned int num_points = std::min({ result.size(), input0.size(), input1.size() });
+    cppmultiply_pointers(result.data(), input0.data(), input1.data(), num_points);
+}
+
+void cppmultiply_aligned_vector(volk::vector<fc32>& result,
+                                const volk::vector<fc32>& input0,
+                                const volk::vector<fc32>& input1)
+{
+    unsigned int num_points = std::min({ result.size(), input0.size(), input1.size() });
+    cppmultiply_pointers(result.data(), input0.data(), input1.data(), num_points);
+}
+
+} // namespace volk
+
+
+std::vector<fc32> fill_vector(int num_points, float step_value)
+{
+    std::vector<fc32> vec(num_points);
+
+    for (unsigned int ii = 0; ii < num_points; ++ii) {
+        float real_1 = std::cos(step_value * (float)ii);
+        float imag_1 = std::sin(step_value * (float)ii);
+        vec[ii] = fc32(real_1, imag_1);
+    }
+    return vec;
+}
+
+void function_test_vectors(int num_points)
+{
+    std::vector<fc32> uin0(fill_vector(num_points, 0.3f));
+    volk::vector<fc32> in0(uin0.begin(), uin0.end());
+    std::vector<fc32> uin1(fill_vector(num_points, 0.1f));
+    volk::vector<fc32> in1(uin1.begin(), uin1.end());
+    std::vector<fc32> uout(num_points);
+    volk::vector<fc32> out(num_points);
+
+    volk::cppmultiply_aligned_vector(out, in0, in1);
+
+    volk::cppmultiply_stl_vector(uout, uin0, uin1);
+    volk::cppmultiply_pointers(uout.data(), in0.data(), in1.data(), num_points);
+
+    for (int ii = 0; ii < num_points; ++ii) {
+        fc32 v0 = in0[ii];
+        fc32 v1 = in1[ii];
+        fc32 o = out[ii];
+
+        fmt::print(
+            "in0=({:+.1f}{:+.1f}j), in1=({:+.1f}{:+.1f}j), out=({:+.1f}{:+.1f}j)\n",
+            std::real(v0),
+            std::imag(v0),
+            std::real(v1),
+            std::imag(v1),
+            std::real(o),
+            std::imag(o));
+    }
+}
+
+void function_test_with_scalar(int num_points)
+{
+    std::vector<fc32> uin0(fill_vector(num_points, 0.3f));
+    volk::vector<fc32> in0(uin0.begin(), uin0.end());
+    fc32 scalar{ 0.5f, 4.3f };
+    std::vector<fc32> uout(num_points);
+    volk::vector<fc32> out(num_points);
+
+    volk::cppscalarmultiply_aligned_vector(out, in0, scalar);
+
+    volk::cppscalarmultiply_stl_vector(uout, uin0, scalar);
+    volk::cppscalarmultiply_pointers(uout.data(), in0.data(), scalar, num_points);
+
+    fmt::print("scalar=({:+.1f}{:+.1f}j)\n", std::real(scalar), std::imag(scalar));
+    for (int ii = 0; ii < num_points; ++ii) {
+        fc32 v0 = in0[ii];
+        fc32 o = out[ii];
+
+        fmt::print("in0=({:+.1f}{:+.1f}j), out=({:+.1f}{:+.1f}j)\n",
+                   std::real(v0),
+                   std::imag(v0),
+                   std::real(o),
+                   std::imag(o));
+    }
+}
+
+int main(int argc, char* argv[])
+{
+    fmt::print("Vector function test\n");
+    function_test_vectors(16);
+
+    fmt::print("Scalar function test\n");
+    function_test_with_scalar(16);
+
+    lv_32fc_t fc_cpl[4];
+    fmt::print("float={}, complex float={}, complex float array[4]={}\n",
+               sizeof(float),
+               sizeof(lv_32fc_t),
+               sizeof(fc_cpl));
+
+
+    std::vector<lv_32fc_t> vec(4);
+    for (int i = 0; i < 4; i++) {
+        auto foo = std::complex<float>((i + 3), (i + 8));
+        fmt::print("std::complex: ({:+.1f}{:+.1f}j)\n", std::real(foo), std::imag(foo));
+        lv_32fc_t bar = lv_32fc_t{ 5, 6 };
+        vec.at(i) = bar;
+    }
+
+    for (auto& val : vec) {
+        float r = __real__ val;
+        float i = __imag__ val;
+        fmt::print("sizeof(val)={}, {:+.1f}{:+.1f}j\n", sizeof(val), r, i);
+    }
+}
diff --git a/tmpl/volk_typedefs.tmpl.h b/tmpl/volk_typedefs.tmpl.h
index 2600c642c..a1dad61a7 100644
--- a/tmpl/volk_typedefs.tmpl.h
+++ b/tmpl/volk_typedefs.tmpl.h
@@ -10,11 +10,17 @@
 #ifndef INCLUDED_VOLK_TYPEDEFS
 #define INCLUDED_VOLK_TYPEDEFS
 
+
 #include <inttypes.h>
 #include <volk/volk_complex.h>
+#include <volk/volk_common.h>
+
+__VOLK_DECL_BEGIN
 
 %for kern in kernels:
 typedef void (*${kern.pname})(${kern.arglist_types});
 %endfor
 
+__VOLK_DECL_END
+
 #endif /*INCLUDED_VOLK_TYPEDEFS*/