From b6677ef95823e07c7014b5c3dfac1dc8cb3e9337 Mon Sep 17 00:00:00 2001 From: Techcable Date: Wed, 17 Jun 2020 12:24:10 -0700 Subject: [PATCH] [simple] Used relaxed atomic for should_collect checks at a safepoint This should be easier for a JIT to implement since it doesn't require any fences. See issue #12 As of this writing, cranelift doesn't even support atomic fences. It's also could be faster on ARM architectures. Seems important since it's such a hot-path..... --- libs/simple/src/context.rs | 2 +- libs/simple/src/lib.rs | 25 ++++++++++++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/libs/simple/src/context.rs b/libs/simple/src/context.rs index 628e0bd..aab1e73 100644 --- a/libs/simple/src/context.rs +++ b/libs/simple/src/context.rs @@ -294,7 +294,7 @@ unsafe impl GcContext for SimpleCollectorContext { debug_assert_eq!(self.raw.state.get(), ContextState::Active); let dyn_ptr = (*self.raw.shadow_stack.get()) .push(value); - if self.raw.collector.should_collect() { + if self.raw.collector.should_collect_relaxed() { self.raw.trigger_safepoint(); } debug_assert_eq!(self.raw.state.get(), ContextState::Active); diff --git a/libs/simple/src/lib.rs b/libs/simple/src/lib.rs index 0cc6bfe..6dffa0d 100644 --- a/libs/simple/src/lib.rs +++ b/libs/simple/src/lib.rs @@ -258,15 +258,20 @@ impl GcHeap { } } #[inline] - fn should_collect(&self) -> bool { + fn should_collect_relaxed(&self) -> bool { /* * Going with relaxed ordering because it's not essential * that we see updates immediately. * Eventual consistency should be enough to eventually * trigger a collection. + * + * This is much cheaper on ARM (since it avoids a fence) + * and is much easier to use with a JIT. + * A JIT will insert these very often, so it's important to make + * them fast! */ - self.allocator.allocated_size.load(Ordering::Acquire) - >= self.threshold.load(Ordering::Acquire) + self.allocator.allocated_size.load(Ordering::Relaxed) + >= self.threshold.load(Ordering::Relaxed) } } @@ -520,14 +525,16 @@ struct RawSimpleCollector { } impl RawSimpleCollector { #[inline] - fn should_collect(&self) -> bool { + fn should_collect_relaxed(&self) -> bool { /* - * TODO: Consider relaxed ordering - * It'd be cheaper on ARM but potentially - * delay collection..... + * Use relaxed ordering, just like `should_collect` + * + * This is faster on ARM since it avoids a memory fence. + * More importantly this is easier for a JIT to implement inline!!! + * As of this writing cranelift doesn't even seem to support fences :o */ - self.heap.should_collect() || self.collecting - .load(Ordering::Acquire) + self.heap.should_collect_relaxed() || self.collecting + .load(Ordering::Relaxed) } #[cold] #[inline(never)]