From 5b9bc157df5d115e513f094ae3ef8cfd3f8f3f48 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Tue, 1 Aug 2023 11:54:49 +0900 Subject: [PATCH] Optimize non-relaxed load/store on pre-v6 ARM Linux/Android --- bench/benches/bench.rs | 4 + bench/benches/imp/spinlock_fallback.rs | 6 +- build.rs | 9 +- src/imp/arm_linux.rs | 156 ++++++++++++++++++++++--- src/imp/core_atomic.rs | 32 ++++- src/imp/fallback/seq_lock_wide.rs | 3 +- src/imp/mod.rs | 2 - 7 files changed, 183 insertions(+), 29 deletions(-) diff --git a/bench/benches/bench.rs b/bench/benches/bench.rs index 896cdb97..405b21a2 100644 --- a/bench/benches/bench.rs +++ b/bench/benches/bench.rs @@ -15,6 +15,7 @@ use criterion::{criterion_group, criterion_main, Criterion}; #[allow(dead_code, unused_macros)] #[path = "../../src/utils.rs"] mod utils; + #[allow(dead_code, unused_macros)] #[macro_use] #[path = "../../src/tests"] @@ -43,6 +44,9 @@ mod arch; #[allow(dead_code, unused_imports)] #[path = "../../src/imp/arm_linux.rs"] mod arch; +#[allow(dead_code, unused_imports)] +#[path = "../../src/imp/mod.rs"] +mod imp; #[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))] #[allow(dead_code, unused_imports)] #[path = "../../src/imp/atomic128/intrinsics.rs"] diff --git a/bench/benches/imp/spinlock_fallback.rs b/bench/benches/imp/spinlock_fallback.rs index f017393c..b698aacb 100644 --- a/bench/benches/imp/spinlock_fallback.rs +++ b/bench/benches/imp/spinlock_fallback.rs @@ -7,12 +7,10 @@ // // This module is currently only enabled on benchmark. -use core::{ - cell::UnsafeCell, - sync::atomic::{AtomicUsize, Ordering}, -}; +use core::{cell::UnsafeCell, sync::atomic::Ordering}; use super::fallback::utils::{Backoff, CachePadded}; +use crate::imp::AtomicUsize; struct Spinlock { state: AtomicUsize, diff --git a/build.rs b/build.rs index 3cf86ef9..a5655471 100644 --- a/build.rs +++ b/build.rs @@ -260,12 +260,14 @@ fn main() { } } target_feature_if("mclass", is_mclass, &version, None, true); + let mut v5te = known && subarch.starts_with("v5te"); let v6 = known && (subarch.starts_with("v6") || subarch.starts_with("v7") || subarch.starts_with("v8") || subarch.starts_with("v9")); - target_feature_if("v6", v6, &version, None, true); + v5te |= target_feature_if("v6", v6, &version, None, true); + target_feature_if("v5te", v5te, &version, None, true); } "powerpc64" => { let target_endian = @@ -335,7 +337,7 @@ fn target_feature_if( version: &Version, stabilized: Option, is_rustc_target_feature: bool, -) { +) -> bool { // HACK: Currently, it seems that the only way to handle unstable target // features on the stable is to parse the `-C target-feature` in RUSTFLAGS. // @@ -350,7 +352,7 @@ fn target_feature_if( && (version.nightly || stabilized.map_or(false, |stabilized| version.minor >= stabilized)) { // In this case, cfg(target_feature = "...") would work, so skip emitting our own target_feature cfg. - return; + return false; } else if let Some(rustflags) = env::var_os("CARGO_ENCODED_RUSTFLAGS") { for mut flag in rustflags.to_string_lossy().split('\x1f') { flag = strip_prefix(flag, "-C").unwrap_or(flag); @@ -370,6 +372,7 @@ fn target_feature_if( if has_target_feature { println!("cargo:rustc-cfg=portable_atomic_target_feature=\"{}\"", name); } + has_target_feature } fn target_cpu() -> Option { diff --git a/src/imp/arm_linux.rs b/src/imp/arm_linux.rs index 99621103..0a45243d 100644 --- a/src/imp/arm_linux.rs +++ b/src/imp/arm_linux.rs @@ -11,34 +11,29 @@ // be possible to omit the dynamic kernel version check if the std feature is enabled on Rust 1.64+. // https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] #[path = "fallback/outline_atomics.rs"] mod fallback; #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; -use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; +use core::sync::atomic::Ordering; +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] +use core::{cell::UnsafeCell, mem}; -/// A 64-bit value represented as a pair of 32-bit values. -/// -/// This type is `#[repr(C)]`, both fields have the same in-memory representation -/// and are plain old datatypes, so access to the fields is always safe. -#[derive(Clone, Copy)] -#[repr(C)] -union U64 { - whole: u64, - pair: Pair, -} -#[derive(Clone, Copy)] -#[repr(C)] -struct Pair { - lo: u32, - hi: u32, -} +use super::core_atomic::{ + AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8, + AtomicUsize, +}; // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC; +// __kuser_helper_version >= 3 (kernel version 2.6.15+) +const KUSER_MEMORY_BARRIER: usize = 0xFFFF0FA0; // __kuser_helper_version >= 5 (kernel version 3.1+) +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] const KUSER_CMPXCHG64: usize = 0xFFFF0F60; + #[inline] fn __kuser_helper_version() -> i32 { use core::sync::atomic::AtomicI32; @@ -54,6 +49,123 @@ fn __kuser_helper_version() -> i32 { CACHE.store(v, Ordering::Relaxed); v } + +#[cfg(any(target_feature = "v5te", portable_atomic_target_feature = "v5te"))] +macro_rules! blx { + ($addr:tt) => { + concat!("blx ", $addr) + }; +} +#[cfg(not(any(target_feature = "v5te", portable_atomic_target_feature = "v5te")))] +macro_rules! blx { + ($addr:tt) => { + concat!("mov lr, pc", "\n", "bx ", $addr) + }; +} + +macro_rules! atomic_load_store { + ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => { + impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? { + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + #[inline] + pub(crate) fn load(&self, order: Ordering) -> $value_type { + crate::utils::assert_load_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + match order { + Ordering::Relaxed => self.inner.load(Ordering::Relaxed), + // Acquire and SeqCst loads are equivalent. + Ordering::Acquire | Ordering::SeqCst => { + debug_assert!(__kuser_helper_version() >= 3); + let src = self.as_ptr(); + let out; + asm!( + concat!("ldr", $asm_suffix, " {out}, [{src}]"), + blx!("{kuser_memory_barrier}"), + src = in(reg) src, + out = lateout(reg) out, + kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _, + out("lr") _, + options(nostack, preserves_flags), + ); + out + } + _ => unreachable!("{:?}", order), + } + } + } + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn store(&self, val: $value_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + let dst = self.as_ptr(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + macro_rules! atomic_store_release { + ($acquire:expr) => {{ + debug_assert!(__kuser_helper_version() >= 3); + asm!( + blx!("{kuser_memory_barrier}"), + concat!("str", $asm_suffix, " {val}, [{dst}]"), + $acquire, + dst = in(reg) dst, + val = in(reg) val, + kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _, + out("lr") _, + options(nostack, preserves_flags), + ) + }}; + } + match order { + Ordering::Relaxed => self.inner.store(val, Ordering::Relaxed), + Ordering::Release => atomic_store_release!(""), + Ordering::SeqCst => atomic_store_release!(blx!("{kuser_memory_barrier}")), + _ => unreachable!("{:?}", order), + } + } + } + } + }; +} + +atomic_load_store!(AtomicI8, i8, "b"); +atomic_load_store!(AtomicU8, u8, "b"); +atomic_load_store!(AtomicI16, i16, "h"); +atomic_load_store!(AtomicU16, u16, "h"); +atomic_load_store!(AtomicI32, i32, ""); +atomic_load_store!(AtomicU32, u32, ""); +atomic_load_store!(AtomicIsize, isize, ""); +atomic_load_store!(AtomicUsize, usize, ""); +atomic_load_store!([T] AtomicPtr, *mut T, ""); + +/// A 64-bit value represented as a pair of 32-bit values. +/// +/// This type is `#[repr(C)]`, both fields have the same in-memory representation +/// and are plain old datatypes, so access to the fields is always safe. +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] +#[derive(Clone, Copy)] +#[repr(C)] +union U64 { + whole: u64, + pair: Pair, +} +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] +#[derive(Clone, Copy)] +#[repr(C)] +struct Pair { + lo: u32, + hi: u32, +} + +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] #[inline] fn has_kuser_cmpxchg64() -> bool { // Note: detect_false cfg is intended to make it easy for portable-atomic developers to @@ -64,6 +176,7 @@ fn has_kuser_cmpxchg64() -> bool { } __kuser_helper_version() >= 5 } +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] #[inline] unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool { // SAFETY: the caller must uphold the safety contract. @@ -75,6 +188,7 @@ unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut } // 64-bit atomic load by two 32-bit atomic loads. +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] #[inline] unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 { // SAFETY: the caller must uphold the safety contract. @@ -92,6 +206,7 @@ unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 { } } +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] #[inline(always)] unsafe fn atomic_update_kuser_cmpxchg64(dst: *mut u64, mut f: F) -> u64 where @@ -123,6 +238,7 @@ macro_rules! atomic_with_ifunc { unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)? { $($kuser_cmpxchg64_fn_body:tt)* } fallback = $seqcst_fallback_fn:ident ) => { + #[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] #[inline] unsafe fn $name($($arg)*) $(-> $ret_ty)? { unsafe fn kuser_cmpxchg64_fn($($arg)*) $(-> $ret_ty)? { @@ -265,6 +381,7 @@ atomic_with_ifunc! { fallback = atomic_neg_seqcst } +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] macro_rules! atomic64 { ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => { #[repr(C, align(8))] @@ -454,7 +571,9 @@ macro_rules! atomic64 { }; } +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] atomic64!(AtomicI64, i64, atomic_max, atomic_min); +#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] atomic64!(AtomicU64, u64, atomic_umax, atomic_umin); #[allow( @@ -475,10 +594,13 @@ mod tests { assert_eq!(version, unsafe { (KUSER_HELPER_VERSION as *const i32).read() }); } + #[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] test_atomic_int!(i64); + #[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] test_atomic_int!(u64); // load/store/swap implementation is not affected by signedness, so it is // enough to test only unsigned types. + #[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))] stress_test!(u64); } diff --git a/src/imp/core_atomic.rs b/src/imp/core_atomic.rs index 72da6fa2..7b88dc17 100644 --- a/src/imp/core_atomic.rs +++ b/src/imp/core_atomic.rs @@ -20,7 +20,7 @@ unsafe impl Sync for NoRefUnwindSafe {} #[repr(transparent)] pub(crate) struct AtomicPtr { - inner: core::sync::atomic::AtomicPtr, + pub(crate) inner: core::sync::atomic::AtomicPtr, // Prevent RefUnwindSafe from being propagated from the std atomic type. _marker: PhantomData, } @@ -45,6 +45,13 @@ impl AtomicPtr { pub(crate) fn into_inner(self) -> *mut T { self.inner.into_inner() } + #[cfg(not(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + )))] #[inline] #[cfg_attr( any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), @@ -54,6 +61,13 @@ impl AtomicPtr { crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check) self.inner.load(order) } + #[cfg(not(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + )))] #[inline] #[cfg_attr( any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), @@ -125,7 +139,7 @@ macro_rules! atomic_int { ($atomic_type:ident, $int_type:ident) => { #[repr(transparent)] pub(crate) struct $atomic_type { - inner: core::sync::atomic::$atomic_type, + pub(crate) inner: core::sync::atomic::$atomic_type, // Prevent RefUnwindSafe from being propagated from the std atomic type. _marker: PhantomData, } @@ -167,6 +181,13 @@ macro_rules! atomic_int { pub(crate) fn into_inner(self) -> $int_type { self.inner.into_inner() } + #[cfg(not(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + )))] #[inline] #[cfg_attr( any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), @@ -176,6 +197,13 @@ macro_rules! atomic_int { crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check) self.inner.load(order) } + #[cfg(not(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + )))] #[inline] #[cfg_attr( any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), diff --git a/src/imp/fallback/seq_lock_wide.rs b/src/imp/fallback/seq_lock_wide.rs index 74b08d24..b2f52800 100644 --- a/src/imp/fallback/seq_lock_wide.rs +++ b/src/imp/fallback/seq_lock_wide.rs @@ -2,10 +2,11 @@ use core::{ mem::ManuallyDrop, - sync::atomic::{self, AtomicUsize, Ordering}, + sync::atomic::{self, Ordering}, }; use super::utils::Backoff; +use crate::imp::AtomicUsize; // See mod.rs for details. pub(super) type AtomicChunk = AtomicUsize; diff --git a/src/imp/mod.rs b/src/imp/mod.rs index 3dbe8e6c..1b92cfa5 100644 --- a/src/imp/mod.rs +++ b/src/imp/mod.rs @@ -101,14 +101,12 @@ mod powerpc64; mod s390x; // Miri and Sanitizer do not support inline assembly. -#[cfg(feature = "fallback")] #[cfg(all( not(any(miri, portable_atomic_sanitize_thread)), any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), target_arch = "arm", any(target_os = "linux", target_os = "android"), not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), - not(portable_atomic_no_outline_atomics), ))] #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))] #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]