Define xnn_float16 to be _Float16 if we detect that we are compiling …

…in an environment that supports it fully. Add explicit casts in a handful of necessary cases. PiperOrigin-RevId: 689418297
google · Oct 24, 2024 · c9ba46b · c9ba46b
1 parent 79a0d9c
commit c9ba46b
Show file tree

Hide file tree

Showing 12 changed files with 116 additions and 42 deletions.
diff --git a/bench/f16-rminmax.cc b/bench/f16-rminmax.cc
@@ -43,7 +43,7 @@ static void f16_rminmax(
     init_params(&params);
   }
 
-  xnn_float16 output[2] = {std::nanf(""), std::nanf("")};
+  xnn_float16 output[2] = {(xnn_float16)std::nanf(""), (xnn_float16)std::nanf("")};
   for (auto _ : state) {
     rminmax(elements * sizeof(xnn_float16), input.data(), output, &params);
   }

diff --git a/src/xnnpack/math.h b/src/xnnpack/math.h
@@ -426,6 +426,46 @@ XNN_INLINE static uint16_t math_cvt_bf16_fp32(float x) {
 }  // extern "C"
 #endif
 
+// We want to use _Float16 if the compiler supports it fully, but it's
+// tricky to do this detection; there are compiler versions that "support" it
+// as a type but without any operations on it (i.e., it just supports conversion
+// to/from float32). We're only going to bother using it if the support is
+// mostly complete, which generally means a recent version of Clang or GCC,
+// x86 or ARM architectures, and (in some cases) the right architecture
+// flags specified on the command line.
+
+#ifndef XNN_HAVE_FLOAT16
+
+// Some non-GCC compilers define __GNUC__, but we only want to detect the Real
+// Thing
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \
+    !defined(__INTEL_LLVM_COMPILER)
+#define XNN_GNUC_ACTUAL __GNUC__
+#else
+#define XNN_GNUC_ACTUAL 0
+#endif
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__) && \
+    defined(__FLT16_MAX__) &&                                          \
+    ((__clang_major__ >= 15 && !defined(_MSC_VER)) || (XNN_GNUC_ACTUAL >= 12))
+#define XNN_HAVE_FLOAT16 1
+#endif
+
+#if (defined(__aarch64__) && !defined(_MSC_VER)) &&     \
+    ((defined(__clang__) && (__clang_major__ >= 15)) || \
+     (XNN_GNUC_ACTUAL >= 13))
+#define XNN_HAVE_FLOAT16 1
+#endif
+
+#if defined(__riscv) && defined(__riscv_zvfh) && __clang__ >= 1600
+#define XNN_HAVE_FLOAT16 1
+#endif
+
+#endif  // XNN_HAVE_FLOAT16
+
+#ifdef XNN_HAVE_FLOAT16
+typedef _Float16 xnn_float16;
+#else
 // We want float16s to be a distinct type from uint16_t, to avoid accidental
 // reinterpret casts as integers. This type is designed to produce errors when
 // using it as an arithmetic type in C, and designed to emulate a native float16
@@ -441,6 +481,7 @@ struct xnn_float16 {
 #endif
 };
 typedef struct xnn_float16 xnn_float16;
+#endif
 
 struct xnn_bfloat16 {
   uint16_t value;
@@ -460,13 +501,21 @@ extern "C" {
 #endif
 
 XNN_INLINE static xnn_float16 xnn_float16_from_float(float f) {
+#ifdef XNN_HAVE_FLOAT16
+  return f;
+#else
   struct xnn_float16 result;
   result.value = fp16_ieee_from_fp32_value(f);
   return result;
+#endif
 }
 
 XNN_INLINE static float xnn_float16_to_float(xnn_float16 fp16) {
+#ifdef XNN_HAVE_FLOAT16
+  return (float) fp16;
+#else
   return fp16_ieee_to_fp32_value(fp16.value);
+#endif
 }
 
 XNN_INLINE static xnn_bfloat16 xnn_bfloat16_from_float(float f) {
@@ -480,14 +529,22 @@ XNN_INLINE static float xnn_bfloat16_to_float(xnn_bfloat16 bf16) {
 }
 
 XNN_INLINE static xnn_float16 xnn_float16_zero() {
+#ifdef XNN_HAVE_FLOAT16
+  return 0.0f;
+#else
   struct xnn_float16 result;
   result.value = 0;
   return result;
+#endif
 }
 
 XNN_INLINE static bool xnn_float16_is_zero(xnn_float16 f) {
+#ifdef XNN_HAVE_FLOAT16
+  return f == 0.0f || f == -0.0f;
+#else
   // Check for +/- zero (0x0000/0x8000). uint16 overflow is well defined to wrap around.
   return f.value * 2 == 0;
+#endif
 }
 
 #ifdef __cplusplus

diff --git a/test/convolution-operator-tester.h b/test/convolution-operator-tester.h
@@ -2385,7 +2385,7 @@ class ConvolutionOperatorTester {
               // FP16 saturates, it's the nature of the beast. If both reference and
               // actual are infinity, then consider the output to be correct.
               const bool reference_infinity = std::isinf(output_ref[(((i * oh + y) * ow + x) * groups() + g) * group_output_channels() + c]);
-              const bool actual_infinity = std::isinf(output[((i * oh + y) * ow + x) * output_channel_stride() + g * group_output_channels() + c]);
+              const bool actual_infinity = std::isinf((float)output[((i * oh + y) * ow + x) * output_channel_stride() + g * group_output_channels() + c]);
               const float tolerance = std::max(1.0e-4f, std::abs(output_ref[(((i * oh + y) * ow + x) * groups() + g) * group_output_channels() + c]) * 2.0e-2f);
               if (reference_infinity && actual_infinity) {
                 continue;

diff --git a/test/fully-connected-operator-tester.h b/test/fully-connected-operator-tester.h
@@ -1277,8 +1277,8 @@ class FullyConnectedOperatorTester {
             int32_t c_ref_acc = 0;
             for (size_t ki = 0; ki < block_size(); ki++) {
               const size_t k_index = bi * block_size() + ki;
-              const size_t nb_index = transpose_weights() ? 
-                                      (k_index * kernel_stride) + (ni / 2) : 
+              const size_t nb_index = transpose_weights() ?
+                                      (k_index * kernel_stride) + (ni / 2) :
                                       (ni * kernel_stride) + (k_index / 2);
               const size_t plane_idx = transpose_weights() ? ni : ki;
               const int32_t kernel_value = int32_t((plane_idx % 2 == 0) ? (kernel[nb_index] & UINT8_C(0xF)) : (kernel[nb_index] >> 4)) - kernel_zero_point();
@@ -1340,14 +1340,14 @@ class FullyConnectedOperatorTester {
       }
 
       const xnn_status status = xnn_create_fully_connected_nc_qp8_f32_qb4w(
-          input_channels(), output_channels(), 
-          input_stride(), output_stride(), 
+          input_channels(), output_channels(),
+          input_stride(), output_stride(),
           block_size(),
-          kernel_zero_point(), 
-          kernel_scale2d.data(), 
+          kernel_zero_point(),
+          kernel_scale2d.data(),
           kernel.data(),
-          has_bias() ? bias.data() : nullptr, 
-          output_min, output_max, 
+          has_bias() ? bias.data() : nullptr,
+          output_min, output_max,
           transpose_weights() ? XNN_FLAG_TRANSPOSE_WEIGHTS : 0,
           nullptr, auto_weights_cache.get(), &fully_connected_op);
       if (status == xnn_status_unsupported_hardware) {
@@ -3194,7 +3194,7 @@ class FullyConnectedOperatorTester {
         // FP16 overflows, it's the nature of the beast. If both reference and
         // actual are infinity, then consider the output to be correct.
         const bool reference_infinity = std::isinf(output_ref[i * output_channels() + c]);
-        const bool actual_infinity = std::isinf(output[i * output_stride() + c]);
+        const bool actual_infinity = std::isinf((float)output[i * output_stride() + c]);
         if (reference_infinity && actual_infinity) {
           continue;
         }

diff --git a/test/gemm-microkernel-tester.cc b/test/gemm-microkernel-tester.cc
@@ -233,7 +233,7 @@ void GemmMicrokernelTester::Test(
         const float tolerance = std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-2f);
         EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], tolerance)
             << "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
-            << "), optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
+            << "), optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
             << nr() << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k();
       }
     }
@@ -950,7 +950,7 @@ void GemmMicrokernelTester::Test(
         const float tolerance = std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-2f);
         EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], tolerance)
             << "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
-            << "), optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
+            << "), optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
             << nr() << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k();
       }
     }
@@ -1219,7 +1219,7 @@ void GemmMicrokernelTester::Test(
         EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], tolerance)
             << "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
             << " (accumulator = " << acc[i * n() + j]
-            << "), optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
+            << "), optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
             << nr() << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k();
       }
     }
@@ -1381,7 +1381,7 @@ void GemmMicrokernelTester::Test(
         const float tolerance = std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-3f);
         EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], tolerance)
             << "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
-            << ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
+            << ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
             << nr() << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k2;
       }
     }
@@ -2229,9 +2229,9 @@ void GemmMicrokernelTester::Test(
 
     std::fill(packed_w.begin(), packed_w.end(), 0);
     pack(/*g=*/1, n(), k(), nr(), kr(), sr(),
-         reinterpret_cast<const uint16_t*>(b.data()), 
+         reinterpret_cast<const uint16_t*>(b.data()),
          reinterpret_cast<const uint16_t*>(bias.data()), /*scale=*/nullptr,
-         reinterpret_cast<uint16_t*>(packed_w.data()), 
+         reinterpret_cast<uint16_t*>(packed_w.data()),
          /*extra_bytes=*/0, /*params=*/nullptr);
 
     for (size_t m_index = 0; m_index < m(); m_index++) {
@@ -2292,7 +2292,7 @@ void GemmMicrokernelTester::Test(
 
   xnnpack::ReplicableRandomDevice rng;
   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
-  
+
   xnnpack::Buffer<xnn_float16> a((m() - 1) * a_stride() + k() + XNN_EXTRA_BYTES / sizeof(xnn_float16));
   xnnpack::Buffer<xnn_float16> b(n() * k());
   xnnpack::Buffer<xnn_float16, XNN_ALLOCATION_ALIGNMENT> packed_w(
@@ -2309,10 +2309,10 @@ void GemmMicrokernelTester::Test(
 
     std::fill(packed_w.begin(), packed_w.end(), 0);
     pack(/*g=*/1, n(), k(), nr(), kr(), sr(),
-         reinterpret_cast<const uint16_t*>(b.data()), 
-         reinterpret_cast<const uint16_t*>(bias.data()), 
-         /*scale=*/nullptr, 
-         reinterpret_cast<uint16_t*>(packed_w.data()), 
+         reinterpret_cast<const uint16_t*>(b.data()),
+         reinterpret_cast<const uint16_t*>(bias.data()),
+         /*scale=*/nullptr,
+         reinterpret_cast<uint16_t*>(packed_w.data()),
          /*extra_bytes=*/0, /*params=*/nullptr);
 
     for (size_t m_index = 0; m_index < m(); m_index++) {
@@ -2355,7 +2355,7 @@ void GemmMicrokernelTester::Test(
       for (size_t j = 0; j < n(); j++) {
         EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-2f))
             << "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
-            << ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
+            << ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
             << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k();
       }
     }
@@ -2371,7 +2371,7 @@ void GemmMicrokernelTester::Test(
 
   xnnpack::ReplicableRandomDevice rng;
   auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
-  
+
   xnnpack::Buffer<xnn_float16> a((mr() - 1) * a_stride() + k() + XNN_EXTRA_BYTES / sizeof(xnn_float16));
   xnnpack::Buffer<xnn_float16> b(n() * ks() * k());
   xnnpack::Buffer<xnn_float16, XNN_ALLOCATION_ALIGNMENT> packed_w(
@@ -2390,9 +2390,9 @@ void GemmMicrokernelTester::Test(
 
     std::fill(packed_w.begin(), packed_w.end(), 0);
     pack(/*g=*/1, n(), ks(), k(), nr(), kr(), sr(),
-         reinterpret_cast<const uint16_t*>(b.data()), 
-         reinterpret_cast<const uint16_t*>(bias.data()), /*scale=*/nullptr, 
-         reinterpret_cast<uint16_t*>(packed_w.data()), 
+         reinterpret_cast<const uint16_t*>(b.data()),
+         reinterpret_cast<const uint16_t*>(bias.data()), /*scale=*/nullptr,
+         reinterpret_cast<uint16_t*>(packed_w.data()),
          /*extra_bytes=*/0, /*params=*/nullptr);
 
     for (size_t ks_index = 0; ks_index < ks(); ks_index++) {
@@ -2469,15 +2469,15 @@ void GemmMicrokernelTester::Test(
       for (size_t j = 0; j < n(); j++) {
         EXPECT_LE(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_max)
             << "at " << i << ", " << i << ": reference = " << c_ref[i * n() + j]
-            << ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
+            << ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
             << " x " << kr() << ", M x N x KC x KS = " << m() << " x " << n() << " x " << k() << " x " << ks();
         EXPECT_GE(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_min)
             << "at " << i << ", " << i << ": reference = " << c_ref[i * n() + j]
-            << ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
+            << ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
             << " x " << kr() << ", M x N x KC x KS = " << m() << " x " << n() << " x " << k() << " x " << ks();
         EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-2f))
             << "at " << i << ", " << i << ": reference = " << c_ref[i * n() + j]
-            << ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
+            << ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
             << " x " << kr() << ", M x N x KC x KS = " << m() << " x " << n() << " x " << k() << " x " << ks();
       }
     }

diff --git a/test/reduce-microkernel-tester.h b/test/reduce-microkernel-tester.h
@@ -70,7 +70,7 @@ class ReduceMicrokernelTester {
       }
 
       // Call optimized micro-kernel.
-      xnn_float16 output[2] = {std::nanf(""), std::nanf("")};
+      xnn_float16 output[2] = {(xnn_float16)std::nanf(""), (xnn_float16)std::nanf("")};
       reduce(batch_size() * sizeof(xnn_float16), input.data(), output, init_params != nullptr ? &params : nullptr);
 
       // Verify results.

diff --git a/test/softmax-operator-tester.h b/test/softmax-operator-tester.h
@@ -130,10 +130,10 @@ class SoftMaxOperatorTester {
       for (size_t i = 0; i < batch_size(); i++) {
         float sum_exp = 0.0;
         for (size_t c = 0; c < channels(); c++) {
-          sum_exp += std::exp(input[i * input_stride() + c]);
+          sum_exp += std::exp((float)input[i * input_stride() + c]);
         }
         for (size_t c = 0; c < channels(); c++) {
-          output_ref[i * channels() + c] = std::exp(input[i * input_stride() + c]) / sum_exp;
+          output_ref[i * channels() + c] = std::exp((float)input[i * input_stride() + c]) / sum_exp;
         }
       }
 

diff --git a/test/static-constant-pad.cc b/test/static-constant-pad.cc
@@ -9,6 +9,7 @@
 #include <cstddef>  // For size_t.
 #include <cstdint>
 #include <memory>  // For std::unique_ptr.
+#include <numeric>
 #include <random>  // For std::uniform_real_distribution.
 #include <vector>  // For std::vector.
 
@@ -137,8 +138,11 @@ TEST_F(StaticConstantPadTestF16, define)
   std::array<size_t, XNN_MAX_TENSOR_DIMS> post_paddings;
   std::fill(pre_paddings.begin(), pre_paddings.begin() + dims.size(), dim_dist(rng));
   std::fill(post_paddings.begin(), post_paddings.begin() + dims.size(), dim_dist(rng));
-  xnn_float16 padding_value = f32dist(rng);
-  uint32_t padding_value_as_bits = padding_value.value;
+  union {
+    xnn_float16 padding_value;
+    uint16_t padding_value_as_bits;
+  };
+  padding_value = f32dist(rng);
 
   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
 

diff --git a/test/unary-operator-tester.h b/test/unary-operator-tester.h
@@ -192,7 +192,7 @@ class UnaryOperatorTester {
     EXPECT_NEAR(y_ref, y, AbsTolF16(y_ref))
         << "at batch " << batch << " / " << batch_size() << ", channel "
         << channel << " / " << channels() << ", input "
-        << input;
+        << (float)input;
   }
   virtual void CheckResultQS8(int8_t y, float y_ref, size_t batch,
                               size_t channel, int8_t input) const {

diff --git a/test/vbinary-microkernel-tester.h b/test/vbinary-microkernel-tester.h
@@ -45,10 +45,10 @@ class VBinaryMicrokernelTester {
           result[i] = a[i] + b[i * stride_b];
           break;
         case OpType::CopySign:
-          result[i] = std::copysign(a[i], b[i * stride_b]);
+          result[i] = copy_sign(a[i], b[i * stride_b]);
           break;
         case OpType::RCopySign:
-          result[i] = std::copysign(b[i * stride_b], a[i]);
+          result[i] = copy_sign(b[i * stride_b], a[i]);
           break;
         case OpType::Div:
           result[i] = a[i] / b[i * stride_b];
@@ -231,6 +231,19 @@ class VBinaryMicrokernelTester {
   uint8_t qmin_{0};
   uint8_t qmax_{255};
   size_t iterations_{15};
+
+  static float copy_sign(float a, float b) {
+    return std::copysign(a, b);
+  }
+
+  static int32_t copy_sign(int32_t a, int32_t b) {
+    return (int32_t)std::copysign((float)a, (float)b);
+  }
+
+  static xnn_float16 copy_sign(xnn_float16 a, xnn_float16 b) {
+    return (xnn_float16)std::copysign((float)a, (float)b);
+  }
+
 };
 
 #define XNN_TEST_BINARY_BATCH_EQ(ukernel, arch_flags, batch_tile, is_binaryc, \

diff --git a/test/vcvt-microkernel-tester.cc b/test/vcvt-microkernel-tester.cc
@@ -47,7 +47,7 @@ void VCvtMicrokernelTester::Test(
       ASSERT_EQ(float_as_uint32(output[i]),
                 float_as_uint32(input[i]))
           << "at " << i << " / " << batch_size() << ", x[" << i << "] = 0x"
-          << std::hex << std::setw(4) << std::setfill('0') << input[i];
+          << std::hex << std::setw(4) << std::setfill('0') << (float) input[i];
     }
   }
 }
@@ -121,8 +121,8 @@ void VCvtMicrokernelTester::Test(xnn_f16_qs8_vcvt_ukernel_fn vcvt,
                   static_cast<int32_t>(output_ref[i]), 1)
           << "at " << i << " / " << batch_size() << ", x[" << i << "] = 0x"
           << std::hex << std::setw(8) << std::setfill('0')
-          << float_as_uint32(input[i]) << " (" << input[i] << ")" << " INPUT "
-          << input[i] << " scale " << scale() << " zp "
+          << float_as_uint32(input[i]) << " (" << (float)input[i] << ")" << " INPUT "
+          << (float)input[i] << " scale " << scale() << " zp "
           << (int)output_zero_point();
     }
   }

diff --git a/test/vunary-microkernel-tester.h b/test/vunary-microkernel-tester.h
@@ -321,7 +321,7 @@ class VUnaryMicrokernelTester {
       for (size_t i = 0; i < batch_size(); i++) {
         ASSERT_NEAR(y[i], y_ref[i], tol(y_ref[i]))
             << "at " << i << " / " << batch_size() << ", x[" << i
-            << "] = " << std::scientific << x[i];
+            << "] = " << std::scientific << (float)x[i];
       }
     }
   }