diff --git a/README.md b/README.md index 0a730b26..c7199b91 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,7 @@ RUSTFLAGS="--cfg portable_atomic_unsafe_assume_single_core" cargo ... If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE (aarch64). Note: - - Dynamic detection is currently only enabled in Rust 1.61+ for aarch64, in Rust 1.59+ (AVX) or 1.69+ (CMPXCHG16B) for x86_64, otherwise it works the same as when this cfg is set. + - Dynamic detection is currently only enabled in Rust 1.59+ for aarch64, in Rust 1.59+ (AVX) or 1.69+ (CMPXCHG16B) for x86_64, otherwise it works the same as when this cfg is set. - If the required target features are enabled at compile-time, the atomic operations are inlined. - This is compatible with no-std (as with all features except `std`). - Some aarch64 targets enable LLVM's `outline-atomics` target feature by default, so if you set this cfg, you may want to disable that as well. (portable-atomic's outline-atomics does not depend on the compiler-rt symbols, so even if you need to disable LLVM's outline-atomics, you may not need to disable portable-atomic's outline-atomics.) diff --git a/build.rs b/build.rs index 59fc06ed..1fa50eef 100644 --- a/build.rs +++ b/build.rs @@ -176,20 +176,6 @@ fn main() { target_feature_if("cmpxchg16b", has_cmpxchg16b, &version, Some(69), true); } "aarch64" => { - // aarch64_target_feature stabilized in Rust 1.61 (nightly-2022-03-16): https://github.com/rust-lang/rust/pull/90621 - if !version.probe(61, 2022, 3, 15) { - if version.nightly && is_allowed_feature("aarch64_target_feature") { - // The part of this feature we use has not been changed since 1.27 - // (https://github.com/rust-lang/rust/commit/1217d70465edb2079880347fea4baaac56895f51) - // until it was stabilized in nightly-2022-03-16, so it can be safely enabled in - // nightly, which is older than nightly-2022-03-16. - println!("cargo:rustc-cfg=portable_atomic_unstable_aarch64_target_feature"); - } else { - // On aarch64, when aarch64_target_feature is not available, outline-atomics is also not available. - println!("cargo:rustc-cfg=portable_atomic_no_outline_atomics"); - } - } - // aarch64 macos always support FEAT_LSE and FEAT_LSE2 because it is armv8.5-a: // https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L458 let is_macos = target_os == "macos"; diff --git a/src/imp/atomic128/aarch64.rs b/src/imp/atomic128/aarch64.rs index 6c55587a..4808d4ca 100644 --- a/src/imp/atomic128/aarch64.rs +++ b/src/imp/atomic128/aarch64.rs @@ -143,6 +143,30 @@ macro_rules! debug_assert_lse { }; } +// https://developer.arm.com/documentation/dui0774/l/armclang-Integrated-Assembler/AArch64-Target-selection-directives?lang=en +#[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] +#[cfg(any( + target_feature = "lse", + portable_atomic_target_feature = "lse", + not(portable_atomic_no_outline_atomics), +))] +macro_rules! start_lse { + () => { + ".arch armv8-a+lse" + }; +} +#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] +#[cfg(any( + target_feature = "lse", + portable_atomic_target_feature = "lse", + not(portable_atomic_no_outline_atomics), +))] +macro_rules! start_lse { + () => { + "" + }; +} + #[cfg(target_pointer_width = "32")] macro_rules! ptr_modifier { () => { @@ -462,7 +486,8 @@ unsafe fn atomic_compare_exchange( #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] let res = { fn_alias! { - #[target_feature(enable = "lse")] + // inline(never) is not strictly necessary, but is used for clarity. + #[inline(never)] unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128; atomic_compare_exchange_casp_relaxed = _atomic_compare_exchange_casp(Ordering::Relaxed); @@ -570,10 +595,11 @@ unsafe fn atomic_compare_exchange( portable_atomic_target_feature = "lse", not(portable_atomic_no_outline_atomics), ))] -#[cfg_attr( - not(any(target_feature = "lse", portable_atomic_target_feature = "lse")), - target_feature(enable = "lse") -)] +// Note: If FEAT_LSE is not available at compile-time, we must guarantee that +// the eventual caller will not be inlined. Otherwise, optimizations that reorder +// its instructions across the if condition might introduce undefined behavior. +// However, our code uses the ifunc helper macro, so we usually don't have to +// worry about this. #[inline] unsafe fn _atomic_compare_exchange_casp( dst: *mut u128, @@ -598,6 +624,7 @@ unsafe fn _atomic_compare_exchange_casp( macro_rules! cmpxchg { ($acquire:tt, $release:tt, $fence:tt) => { asm!( + start_lse!(), concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst", ptr_modifier!(), "}]"), $fence, dst = in(reg) dst, diff --git a/src/lib.rs b/src/lib.rs index 4852f06a..3dfbee99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -146,7 +146,7 @@ RUSTFLAGS="--cfg portable_atomic_unsafe_assume_single_core" cargo ... If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE (aarch64). Note: - - Dynamic detection is currently only enabled in Rust 1.61+ for aarch64, in Rust 1.59+ (AVX) or 1.69+ (CMPXCHG16B) for x86_64, otherwise it works the same as when this cfg is set. + - Dynamic detection is currently only enabled in Rust 1.59+ for aarch64, in Rust 1.59+ (AVX) or 1.69+ (CMPXCHG16B) for x86_64, otherwise it works the same as when this cfg is set. - If the required target features are enabled at compile-time, the atomic operations are inlined. - This is compatible with no-std (as with all features except `std`). - Some aarch64 targets enable LLVM's `outline-atomics` target feature by default, so if you set this cfg, you may want to disable that as well. (portable-atomic's outline-atomics does not depend on the compiler-rt symbols, so even if you need to disable LLVM's outline-atomics, you may not need to disable portable-atomic's outline-atomics.) @@ -247,20 +247,11 @@ RUSTFLAGS="--cfg portable_atomic_unsafe_assume_single_core" cargo ... // These features are already stabilized or have already been removed from compilers, // and can safely be enabled for old nightly as long as version detection works. // - cfg(target_has_atomic) -// - #[target_feature(enable = "lse")] on AArch64 // - #[target_feature(enable = "cmpxchg16b")] on x86_64 // - asm! on ARM, AArch64, RISC-V, x86_64 // - llvm_asm! on AVR (tier 3) and MSP430 (tier 3) // - #[instruction_set] on non-Linux pre-v6 ARM (tier 3) #![cfg_attr(portable_atomic_unstable_cfg_target_has_atomic, feature(cfg_target_has_atomic))] -#![cfg_attr( - all( - target_arch = "aarch64", - portable_atomic_unstable_aarch64_target_feature, - not(portable_atomic_no_outline_atomics), - ), - feature(aarch64_target_feature) -)] #![cfg_attr( all( target_arch = "x86_64",