Skip to content

Commit

Permalink
Define xnn_float16 to be _Float16 if we detect that we are compiling …
Browse files Browse the repository at this point in the history
…in an environment that supports it fully. Add explicit casts in a handful of necessary cases.

PiperOrigin-RevId: 689418297
  • Loading branch information
xnnpack-bot committed Oct 24, 2024
1 parent 79a0d9c commit 9908073
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 32 deletions.
2 changes: 1 addition & 1 deletion bench/f16-rminmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static void f16_rminmax(
init_params(&params);
}

xnn_float16 output[2] = {std::nanf(""), std::nanf("")};
xnn_float16 output[2] = {(xnn_float16)std::nanf(""), (xnn_float16)std::nanf("")};
for (auto _ : state) {
rminmax(elements * sizeof(xnn_float16), input.data(), output, &params);
}
Expand Down
57 changes: 57 additions & 0 deletions src/xnnpack/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,46 @@ XNN_INLINE static uint16_t math_cvt_bf16_fp32(float x) {
} // extern "C"
#endif

// We want to use _Float16 if the compiler supports it fully, but it's
// tricky to do this detection; there are compiler versions that "support" it
// as a type but without any operations on it (i.e., it just supports conversion
// to/from float32). We're only going to bother using it if the support is
// mostly complete, which generally means a recent version of Clang or GCC,
// x86 or ARM architectures, and (in some cases) the right architecture
// flags specified on the command line.

#ifndef XNN_HAVE_FLOAT16

// Some non-GCC compilers define __GNUC__, but we only want to detect the Real
// Thing
#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \
!defined(__INTEL_LLVM_COMPILER)
#define XNN_GNUC_ACTUAL __GNUC__
#else
#define XNN_GNUC_ACTUAL 0
#endif

#if (defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__) && \
defined(__FLT16_MAX__) && \
((__clang_major__ >= 15 && !defined(_MSC_VER)) || (XNN_GNUC_ACTUAL >= 12))
#define XNN_HAVE_FLOAT16 1
#endif

#if (defined(__aarch64__) && !defined(_MSC_VER)) && \
((defined(__clang__) && (__clang_major__ >= 15)) || \
(XNN_GNUC_ACTUAL >= 13))
#define XNN_HAVE_FLOAT16 1
#endif

#if defined(__riscv) && defined(__riscv_zvfh) && __clang__ >= 1600
#define XNN_HAVE_FLOAT16 1
#endif

#endif // XNN_HAVE_FLOAT16

#ifdef XNN_HAVE_FLOAT16
typedef _Float16 xnn_float16;
#else
// We want float16s to be a distinct type from uint16_t, to avoid accidental
// reinterpret casts as integers. This type is designed to produce errors when
// using it as an arithmetic type in C, and designed to emulate a native float16
Expand All @@ -441,6 +481,7 @@ struct xnn_float16 {
#endif
};
typedef struct xnn_float16 xnn_float16;
#endif

struct xnn_bfloat16 {
uint16_t value;
Expand All @@ -460,13 +501,21 @@ extern "C" {
#endif

XNN_INLINE static xnn_float16 xnn_float16_from_float(float f) {
#ifdef XNN_HAVE_FLOAT16
return f;
#else
struct xnn_float16 result;
result.value = fp16_ieee_from_fp32_value(f);
return result;
#endif
}

XNN_INLINE static float xnn_float16_to_float(xnn_float16 fp16) {
#ifdef XNN_HAVE_FLOAT16
return (float) fp16;
#else
return fp16_ieee_to_fp32_value(fp16.value);
#endif
}

XNN_INLINE static xnn_bfloat16 xnn_bfloat16_from_float(float f) {
Expand All @@ -480,14 +529,22 @@ XNN_INLINE static float xnn_bfloat16_to_float(xnn_bfloat16 bf16) {
}

XNN_INLINE static xnn_float16 xnn_float16_zero() {
#ifdef XNN_HAVE_FLOAT16
return 0.0f;
#else
struct xnn_float16 result;
result.value = 0;
return result;
#endif
}

XNN_INLINE static bool xnn_float16_is_zero(xnn_float16 f) {
#ifdef XNN_HAVE_FLOAT16
return f == 0.0f || f == -0.0f;
#else
// Check for +/- zero (0x0000/0x8000). uint16 overflow is well defined to wrap around.
return f.value * 2 == 0;
#endif
}

#ifdef __cplusplus
Expand Down
38 changes: 19 additions & 19 deletions test/gemm-microkernel-tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ void GemmMicrokernelTester::Test(
const float tolerance = std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-2f);
EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], tolerance)
<< "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
<< "), optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
<< "), optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
<< nr() << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k();
}
}
Expand Down Expand Up @@ -950,7 +950,7 @@ void GemmMicrokernelTester::Test(
const float tolerance = std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-2f);
EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], tolerance)
<< "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
<< "), optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
<< "), optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
<< nr() << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k();
}
}
Expand Down Expand Up @@ -1219,7 +1219,7 @@ void GemmMicrokernelTester::Test(
EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], tolerance)
<< "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
<< " (accumulator = " << acc[i * n() + j]
<< "), optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
<< "), optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
<< nr() << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k();
}
}
Expand Down Expand Up @@ -1381,7 +1381,7 @@ void GemmMicrokernelTester::Test(
const float tolerance = std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-3f);
EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], tolerance)
<< "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
<< ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
<< ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x "
<< nr() << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k2;
}
}
Expand Down Expand Up @@ -2229,9 +2229,9 @@ void GemmMicrokernelTester::Test(

std::fill(packed_w.begin(), packed_w.end(), 0);
pack(/*g=*/1, n(), k(), nr(), kr(), sr(),
reinterpret_cast<const uint16_t*>(b.data()),
reinterpret_cast<const uint16_t*>(b.data()),
reinterpret_cast<const uint16_t*>(bias.data()), /*scale=*/nullptr,
reinterpret_cast<uint16_t*>(packed_w.data()),
reinterpret_cast<uint16_t*>(packed_w.data()),
/*extra_bytes=*/0, /*params=*/nullptr);

for (size_t m_index = 0; m_index < m(); m_index++) {
Expand Down Expand Up @@ -2292,7 +2292,7 @@ void GemmMicrokernelTester::Test(

xnnpack::ReplicableRandomDevice rng;
auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));

xnnpack::Buffer<xnn_float16> a((m() - 1) * a_stride() + k() + XNN_EXTRA_BYTES / sizeof(xnn_float16));
xnnpack::Buffer<xnn_float16> b(n() * k());
xnnpack::Buffer<xnn_float16, XNN_ALLOCATION_ALIGNMENT> packed_w(
Expand All @@ -2309,10 +2309,10 @@ void GemmMicrokernelTester::Test(

std::fill(packed_w.begin(), packed_w.end(), 0);
pack(/*g=*/1, n(), k(), nr(), kr(), sr(),
reinterpret_cast<const uint16_t*>(b.data()),
reinterpret_cast<const uint16_t*>(bias.data()),
/*scale=*/nullptr,
reinterpret_cast<uint16_t*>(packed_w.data()),
reinterpret_cast<const uint16_t*>(b.data()),
reinterpret_cast<const uint16_t*>(bias.data()),
/*scale=*/nullptr,
reinterpret_cast<uint16_t*>(packed_w.data()),
/*extra_bytes=*/0, /*params=*/nullptr);

for (size_t m_index = 0; m_index < m(); m_index++) {
Expand Down Expand Up @@ -2355,7 +2355,7 @@ void GemmMicrokernelTester::Test(
for (size_t j = 0; j < n(); j++) {
EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-2f))
<< "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
<< ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
<< ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
<< " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k();
}
}
Expand All @@ -2371,7 +2371,7 @@ void GemmMicrokernelTester::Test(

xnnpack::ReplicableRandomDevice rng;
auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));

xnnpack::Buffer<xnn_float16> a((mr() - 1) * a_stride() + k() + XNN_EXTRA_BYTES / sizeof(xnn_float16));
xnnpack::Buffer<xnn_float16> b(n() * ks() * k());
xnnpack::Buffer<xnn_float16, XNN_ALLOCATION_ALIGNMENT> packed_w(
Expand All @@ -2390,9 +2390,9 @@ void GemmMicrokernelTester::Test(

std::fill(packed_w.begin(), packed_w.end(), 0);
pack(/*g=*/1, n(), ks(), k(), nr(), kr(), sr(),
reinterpret_cast<const uint16_t*>(b.data()),
reinterpret_cast<const uint16_t*>(bias.data()), /*scale=*/nullptr,
reinterpret_cast<uint16_t*>(packed_w.data()),
reinterpret_cast<const uint16_t*>(b.data()),
reinterpret_cast<const uint16_t*>(bias.data()), /*scale=*/nullptr,
reinterpret_cast<uint16_t*>(packed_w.data()),
/*extra_bytes=*/0, /*params=*/nullptr);

for (size_t ks_index = 0; ks_index < ks(); ks_index++) {
Expand Down Expand Up @@ -2469,15 +2469,15 @@ void GemmMicrokernelTester::Test(
for (size_t j = 0; j < n(); j++) {
EXPECT_LE(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_max)
<< "at " << i << ", " << i << ": reference = " << c_ref[i * n() + j]
<< ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
<< ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
<< " x " << kr() << ", M x N x KC x KS = " << m() << " x " << n() << " x " << k() << " x " << ks();
EXPECT_GE(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_min)
<< "at " << i << ", " << i << ": reference = " << c_ref[i * n() + j]
<< ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
<< ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
<< " x " << kr() << ", M x N x KC x KS = " << m() << " x " << n() << " x " << k() << " x " << ks();
EXPECT_NEAR(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()], c_ref[i * n() + j], std::max(1.0e-4f, std::abs(c_ref[i * n() + j]) * 1.0e-2f))
<< "at " << i << ", " << i << ": reference = " << c_ref[i * n() + j]
<< ", optimized = " << c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
<< ", optimized = " << (float)c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()] << ", Mr x Nr x Kr = " << mr() << " x " << nr()
<< " x " << kr() << ", M x N x KC x KS = " << m() << " x " << n() << " x " << k() << " x " << ks();
}
}
Expand Down
2 changes: 1 addition & 1 deletion test/reduce-microkernel-tester.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class ReduceMicrokernelTester {
}

// Call optimized micro-kernel.
xnn_float16 output[2] = {std::nanf(""), std::nanf("")};
xnn_float16 output[2] = {(xnn_float16)std::nanf(""), (xnn_float16)std::nanf("")};
reduce(batch_size() * sizeof(xnn_float16), input.data(), output, init_params != nullptr ? &params : nullptr);

// Verify results.
Expand Down
4 changes: 2 additions & 2 deletions test/softmax-operator-tester.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ class SoftMaxOperatorTester {
for (size_t i = 0; i < batch_size(); i++) {
float sum_exp = 0.0;
for (size_t c = 0; c < channels(); c++) {
sum_exp += std::exp(input[i * input_stride() + c]);
sum_exp += std::exp((float)input[i * input_stride() + c]);
}
for (size_t c = 0; c < channels(); c++) {
output_ref[i * channels() + c] = std::exp(input[i * input_stride() + c]) / sum_exp;
output_ref[i * channels() + c] = std::exp((float)input[i * input_stride() + c]) / sum_exp;
}
}

Expand Down
8 changes: 6 additions & 2 deletions test/static-constant-pad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <cstddef> // For size_t.
#include <cstdint>
#include <memory> // For std::unique_ptr.
#include <numeric>
#include <random> // For std::uniform_real_distribution.
#include <vector> // For std::vector.

Expand Down Expand Up @@ -137,8 +138,11 @@ TEST_F(StaticConstantPadTestF16, define)
std::array<size_t, XNN_MAX_TENSOR_DIMS> post_paddings;
std::fill(pre_paddings.begin(), pre_paddings.begin() + dims.size(), dim_dist(rng));
std::fill(post_paddings.begin(), post_paddings.begin() + dims.size(), dim_dist(rng));
xnn_float16 padding_value = f32dist(rng);
uint32_t padding_value_as_bits = padding_value.value;
union {
xnn_float16 padding_value;
uint16_t padding_value_as_bits;
};
padding_value = f32dist(rng);

ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));

Expand Down
2 changes: 1 addition & 1 deletion test/unary-operator-tester.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ class UnaryOperatorTester {
EXPECT_NEAR(y_ref, y, AbsTolF16(y_ref))
<< "at batch " << batch << " / " << batch_size() << ", channel "
<< channel << " / " << channels() << ", input "
<< input;
<< (float)input;
}
virtual void CheckResultQS8(int8_t y, float y_ref, size_t batch,
size_t channel, int8_t input) const {
Expand Down
17 changes: 15 additions & 2 deletions test/vbinary-microkernel-tester.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ class VBinaryMicrokernelTester {
result[i] = a[i] + b[i * stride_b];
break;
case OpType::CopySign:
result[i] = std::copysign(a[i], b[i * stride_b]);
result[i] = copy_sign(a[i], b[i * stride_b]);
break;
case OpType::RCopySign:
result[i] = std::copysign(b[i * stride_b], a[i]);
result[i] = copy_sign(b[i * stride_b], a[i]);
break;
case OpType::Div:
result[i] = a[i] / b[i * stride_b];
Expand Down Expand Up @@ -231,6 +231,19 @@ class VBinaryMicrokernelTester {
uint8_t qmin_{0};
uint8_t qmax_{255};
size_t iterations_{15};

static float copy_sign(float a, float b) {
return std::copysign(a, b);
}

static int32_t copy_sign(int32_t a, int32_t b) {
return (int32_t)std::copysign((float)a, (float)b);
}

static xnn_float16 copy_sign(xnn_float16 a, xnn_float16 b) {
return (xnn_float16)std::copysign((float)a, (float)b);
}

};

#define XNN_TEST_BINARY_BATCH_EQ(ukernel, arch_flags, batch_tile, is_binaryc, \
Expand Down
6 changes: 3 additions & 3 deletions test/vcvt-microkernel-tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void VCvtMicrokernelTester::Test(
ASSERT_EQ(float_as_uint32(output[i]),
float_as_uint32(input[i]))
<< "at " << i << " / " << batch_size() << ", x[" << i << "] = 0x"
<< std::hex << std::setw(4) << std::setfill('0') << input[i];
<< std::hex << std::setw(4) << std::setfill('0') << (float) input[i];
}
}
}
Expand Down Expand Up @@ -121,8 +121,8 @@ void VCvtMicrokernelTester::Test(xnn_f16_qs8_vcvt_ukernel_fn vcvt,
static_cast<int32_t>(output_ref[i]), 1)
<< "at " << i << " / " << batch_size() << ", x[" << i << "] = 0x"
<< std::hex << std::setw(8) << std::setfill('0')
<< float_as_uint32(input[i]) << " (" << input[i] << ")" << " INPUT "
<< input[i] << " scale " << scale() << " zp "
<< float_as_uint32(input[i]) << " (" << (float)input[i] << ")" << " INPUT "
<< (float)input[i] << " scale " << scale() << " zp "
<< (int)output_zero_point();
}
}
Expand Down
2 changes: 1 addition & 1 deletion test/vunary-microkernel-tester.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ class VUnaryMicrokernelTester {
for (size_t i = 0; i < batch_size(); i++) {
ASSERT_NEAR(y[i], y_ref[i], tol(y_ref[i]))
<< "at " << i << " / " << batch_size() << ", x[" << i
<< "] = " << std::scientific << x[i];
<< "] = " << std::scientific << (float)x[i];
}
}
}
Expand Down

0 comments on commit 9908073

Please sign in to comment.