diff --git a/.github/.cspell/project-dictionary.txt b/.github/.cspell/project-dictionary.txt
index c9f55c96..4a397a5f 100644
--- a/.github/.cspell/project-dictionary.txt
+++ b/.github/.cspell/project-dictionary.txt
@@ -4,6 +4,7 @@ alcgr
 algr
 allnoconfig
 aosp
+aqrl
 armasm
 armreg
 Auxinfo
@@ -79,9 +80,11 @@ lqarx
 lrcpc
 lwsync
 machdep
+maxu
 mfence
 mgba
 midr
+minu
 mipsn
 miscompiles
 mmfr
@@ -125,8 +128,10 @@ simavr
 skiboot
 slbgr
 slgr
+sllw
 spinlock
 sreg
+srlw
 sstatus
 stdarch
 stdbool
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6248b61e..41d8ed78 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -446,6 +446,7 @@ jobs:
       matrix:
         rust:
           - '1.64'
+          - '1.72'
           - stable
           - nightly
     runs-on: ubuntu-latest
diff --git a/Cargo.toml b/Cargo.toml
index 7db4c2f1..a7cc0998 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -58,9 +58,16 @@ require-cas = []
 # https://github.com/taiki-e/portable-atomic#optional-features-unsafe-assume-single-core
 unsafe-assume-single-core = []
 
+# The following are sub-features of the unsafe-assume-single-core feature and if enabled without
+# the unsafe-assume-single-core feature will result in a compile error.
+# There is no explicit "unsafe-" prefix because the user has already opted in to "unsafe" by
+# enabling the unsafe-assume-single-core feature, but misuse of these features is also usually
+# considered unsound.
+
 # For RISC-V targets, generate code for S mode to disable interrupts.
 s-mode = []
-
+# For RISC-V targets, use AMO instructions even if A-extension is disabled.
+force-amo = []
 # For ARM targets, also disable FIQs when disabling interrupts.
 disable-fiq = []
 
diff --git a/build.rs b/build.rs
index a9f9f007..4eee0fa5 100644
--- a/build.rs
+++ b/build.rs
@@ -22,6 +22,8 @@ fn main() {
     println!("cargo:rustc-cfg=portable_atomic_unsafe_assume_single_core");
     #[cfg(feature = "s-mode")]
     println!("cargo:rustc-cfg=portable_atomic_s_mode");
+    #[cfg(feature = "force-amo")]
+    println!("cargo:rustc-cfg=portable_atomic_force_amo");
     #[cfg(feature = "disable-fiq")]
     println!("cargo:rustc-cfg=portable_atomic_disable_fiq");
 
diff --git a/src/imp/interrupt/README.md b/src/imp/interrupt/README.md
index 32c202a8..463d8cf0 100644
--- a/src/imp/interrupt/README.md
+++ b/src/imp/interrupt/README.md
@@ -17,10 +17,11 @@ For some targets, the implementation can be changed by explicitly enabling featu
 - On pre-v6 ARM with the `disable-fiq` feature, this disables interrupts by modifying the I (IRQ mask) bit and F (FIQ mask) bit of the CPSR.
 - On RISC-V (without A-extension), this disables interrupts by modifying the MIE (Machine Interrupt Enable) bit of the `mstatus` register.
 - On RISC-V (without A-extension) with the `s-mode` feature, this disables interrupts by modifying the SIE (Supervisor Interrupt Enable) bit of the `sstatus` register.
+- On RISC-V (without A-extension) with the `force-amo` feature, this uses AMO instructions for RMWs that have corresponding AMO instructions even if A-extension is disabled. For other RMWs, this disables interrupts as usual.
 - On MSP430, this disables interrupts by modifying the GIE (Global Interrupt Enable) bit of the status register (SR).
 - On AVR, this disables interrupts by modifying the I (Global Interrupt Enable) bit of the status register (SREG).
 - On Xtensa, this disables interrupts by modifying the PS special register.
 
-Some operations don't require disabling interrupts (loads and stores on targets except for AVR, but additionally on MSP430 `add`, `sub`, `and`, `or`, `xor`, `not`). However, when the `critical-section` feature is enabled, critical sections are taken for all atomic operations.
+Some operations don't require disabling interrupts (loads and stores on targets except for AVR, but additionally on MSP430 `add,sub,and,or,xor,not`, on RISC-V with the `force-amo` feature `swap,fetch_{add,sub,and,or,xor,not,max,min},add,sub,and,or,xor,not`). However, when the `critical-section` feature is enabled, critical sections are taken for all atomic operations.
 
 Feel free to submit an issue if your target is not supported yet.
diff --git a/src/imp/interrupt/mod.rs b/src/imp/interrupt/mod.rs
index de98712e..ab5fb0a9 100644
--- a/src/imp/interrupt/mod.rs
+++ b/src/imp/interrupt/mod.rs
@@ -175,7 +175,13 @@ impl<T> AtomicPtr<T> {
     }
 
     #[inline]
-    pub(crate) fn swap(&self, ptr: *mut T, _order: Ordering) -> *mut T {
+    pub(crate) fn swap(&self, ptr: *mut T, order: Ordering) -> *mut T {
+        let _ = order;
+        #[cfg(portable_atomic_force_amo)]
+        {
+            self.as_native().swap(ptr, order)
+        }
+        #[cfg(not(portable_atomic_force_amo))]
         // SAFETY: any data races are prevented by disabling interrupts (see
         // module-level comments) and the raw pointer is valid because we got it
         // from a reference.
@@ -277,9 +283,12 @@ macro_rules! atomic_int {
             }
         }
     };
-    (load_store_atomic, $atomic_type:ident, $int_type:ident, $align:literal) => {
+    (load_store_atomic $([$kind:ident])?, $atomic_type:ident, $int_type:ident, $align:literal) => {
         atomic_int!(base, $atomic_type, $int_type, $align);
-        atomic_int!(cas, $atomic_type, $int_type);
+        #[cfg(not(portable_atomic_force_amo))]
+        atomic_int!(cas[emulate], $atomic_type, $int_type);
+        #[cfg(portable_atomic_force_amo)]
+        atomic_int!(cas $([$kind])?, $atomic_type, $int_type);
         impl $atomic_type {
             #[inline]
             #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
@@ -360,7 +369,7 @@ macro_rules! atomic_int {
     };
     (load_store_critical_session, $atomic_type:ident, $int_type:ident, $align:literal) => {
         atomic_int!(base, $atomic_type, $int_type, $align);
-        atomic_int!(cas, $atomic_type, $int_type);
+        atomic_int!(cas[emulate], $atomic_type, $int_type);
         impl_default_no_fetch_ops!($atomic_type, $int_type);
         impl_default_bit_opts!($atomic_type, $int_type);
         impl $atomic_type {
@@ -390,7 +399,7 @@ macro_rules! atomic_int {
             }
         }
     };
-    (cas, $atomic_type:ident, $int_type:ident) => {
+    (cas[emulate], $atomic_type:ident, $int_type:ident) => {
         impl $atomic_type {
             #[inline]
             pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
@@ -544,6 +553,256 @@ macro_rules! atomic_int {
                 })
             }
 
+            #[inline]
+            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_neg());
+                    prev
+                })
+            }
+            #[inline]
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+        }
+    };
+    // cfg(portable_atomic_force_amo) 32-bit(RV32)/{32,64}-bit(RV64) RMW
+    (cas, $atomic_type:ident, $int_type:ident) => {
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().swap(val, order)
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    if prev == current {
+                        self.v.get().write(new);
+                        Ok(prev)
+                    } else {
+                        Err(prev)
+                    }
+                })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                self.compare_exchange(current, new, success, failure)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_add(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_sub(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_and(val, order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(!(prev & val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_or(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_xor(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_max(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_min(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
+                self.as_native().fetch_not(order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_neg());
+                    prev
+                })
+            }
+            #[inline]
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+        }
+    };
+    // cfg(portable_atomic_force_amo) {8,16}-bit RMW
+    (cas[sub_word], $atomic_type:ident, $int_type:ident) => {
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe { self.v.get().replace(val) })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    if prev == current {
+                        self.v.get().write(new);
+                        Ok(prev)
+                    } else {
+                        Err(prev)
+                    }
+                })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                self.compare_exchange(current, new, success, failure)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_add(val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_sub(val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_and(val, order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(!(prev & val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_or(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_xor(val, order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(core::cmp::max(prev, val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(core::cmp::min(prev, val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
+                self.as_native().fetch_not(order)
+            }
+
             #[inline]
             pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
                 // SAFETY: any data races are prevented by disabling interrupts (see
@@ -580,10 +839,10 @@ atomic_int!(load_store_atomic, AtomicIsize, isize, 16);
 #[cfg(target_pointer_width = "128")]
 atomic_int!(load_store_atomic, AtomicUsize, usize, 16);
 
-atomic_int!(load_store_atomic, AtomicI8, i8, 1);
-atomic_int!(load_store_atomic, AtomicU8, u8, 1);
-atomic_int!(load_store_atomic, AtomicI16, i16, 2);
-atomic_int!(load_store_atomic, AtomicU16, u16, 2);
+atomic_int!(load_store_atomic[sub_word], AtomicI8, i8, 1);
+atomic_int!(load_store_atomic[sub_word], AtomicU8, u8, 1);
+atomic_int!(load_store_atomic[sub_word], AtomicI16, i16, 2);
+atomic_int!(load_store_atomic[sub_word], AtomicU16, u16, 2);
 
 #[cfg(not(target_pointer_width = "16"))]
 atomic_int!(load_store_atomic, AtomicI32, i32, 4);
diff --git a/src/imp/riscv.rs b/src/imp/riscv.rs
index 547eac26..1252f9a9 100644
--- a/src/imp/riscv.rs
+++ b/src/imp/riscv.rs
@@ -10,13 +10,75 @@
 // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
 //
 // Generated asm:
-// - riscv64gc https://godbolt.org/z/hx4Krb91h
+// - riscv64gc https://godbolt.org/z/bn7G3j1vn
 
 #[cfg(not(portable_atomic_no_asm))]
 use core::arch::asm;
 use core::{cell::UnsafeCell, sync::atomic::Ordering};
 
-macro_rules! atomic {
+#[cfg(any(test, portable_atomic_force_amo))]
+macro_rules! atomic_rmw_amo_order {
+    ($op:ident, $order:ident) => {
+        match $order {
+            Ordering::Relaxed => $op!(""),
+            Ordering::Acquire => $op!(".aq"),
+            Ordering::Release => $op!(".rl"),
+            // AcqRel and SeqCst RMWs are equivalent.
+            Ordering::AcqRel | Ordering::SeqCst => $op!(".aqrl"),
+            _ => unreachable!("{:?}", $order),
+        }
+    };
+}
+#[cfg(any(test, portable_atomic_force_amo))]
+macro_rules! atomic_rmw_amo {
+    ($op:ident, $dst:ident, $val:ident, $order:ident, $asm_suffix:tt) => {{
+        let out;
+        macro_rules! op {
+            ($asm_order:tt) => {
+                asm!(
+                    ".option push",
+                    ".option arch, +a",
+                    concat!("amo", stringify!($op), ".", $asm_suffix, $asm_order, " {out}, {val}, 0({dst})"),
+                    ".option pop",
+                    dst = in(reg) ptr_reg!($dst),
+                    val = in(reg) $val,
+                    out = lateout(reg) out,
+                    options(nostack, /* TODO: preserves_flags */),
+                )
+            };
+        }
+        atomic_rmw_amo_order!(op, $order);
+        out
+    }};
+}
+// val.wrapping_shl(shift) but no extra `& (u32::BITS - 1)`
+#[cfg(any(test, portable_atomic_force_amo))]
+#[inline]
+fn sll(val: u32, shift: u32) -> u32 {
+    unsafe {
+        let out;
+        #[cfg(target_arch = "riscv32")]
+        asm!("sll {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags));
+        #[cfg(target_arch = "riscv64")]
+        asm!("sllw {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags));
+        out
+    }
+}
+// val.wrapping_shr(shift) but no extra `& (u32::BITS - 1)`
+#[cfg(any(test, portable_atomic_force_amo))]
+#[inline]
+fn srl(val: u32, shift: u32) -> u32 {
+    unsafe {
+        let out;
+        #[cfg(target_arch = "riscv32")]
+        asm!("srl {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags));
+        #[cfg(target_arch = "riscv64")]
+        asm!("srlw {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags));
+        out
+    }
+}
+
+macro_rules! atomic_load_store {
     ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
         #[repr(transparent)]
         pub(crate) struct $atomic_type $(<$($generics)*>)? {
@@ -141,28 +203,169 @@ macro_rules! atomic {
     };
 }
 
-atomic!(AtomicI8, i8, "b");
-atomic!(AtomicU8, u8, "b");
-atomic!(AtomicI16, i16, "h");
-atomic!(AtomicU16, u16, "h");
-atomic!(AtomicI32, i32, "w");
-atomic!(AtomicU32, u32, "w");
+macro_rules! atomic_ptr {
+    ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
+        atomic_load_store!($([$($generics)*])? $atomic_type, $value_type, $asm_suffix);
+        #[cfg(portable_atomic_force_amo)]
+        impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
+            #[inline]
+            pub(crate) fn swap(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(swap, dst, val, order, $asm_suffix) }
+            }
+        }
+    };
+}
+
+macro_rules! atomic {
+    ($atomic_type:ident, $value_type:ty, $asm_suffix:tt, $max:tt, $min:tt) => {
+        atomic_load_store!($atomic_type, $value_type, $asm_suffix);
+        // There is no amo{sub,nand,neg}.
+        #[cfg(any(test, portable_atomic_force_amo))]
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn swap(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(swap, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_add(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(add, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_sub(&self, val: $value_type, order: Ordering) -> $value_type {
+                self.fetch_add(val.wrapping_neg(), order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(and, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(or, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(xor, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $value_type {
+                const NOT_MASK: $value_type = (0 as $value_type).wrapping_sub(1);
+                self.fetch_xor(NOT_MASK, order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_max(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!($max, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_min(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!($min, dst, val, order, $asm_suffix) }
+            }
+        }
+    };
+}
+
+macro_rules! atomic_sub_word {
+    ($atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
+        atomic_load_store!($atomic_type, $value_type, $asm_suffix);
+        #[cfg(any(test, portable_atomic_force_amo))]
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                let (dst, shift, mask) = crate::utils::create_sub_word_mask_values(dst);
+                let mask = !sll(mask as u32, shift as u32);
+                let val = sll(val as u32, shift as u32);
+                let val = val | mask;
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                let out: u32 = unsafe { atomic_rmw_amo!(and, dst, val, order, "w") };
+                srl(out, shift as u32) as $value_type
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst);
+                let val = sll(val as u32, shift as u32);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                let out: u32 = unsafe { atomic_rmw_amo!(or, dst, val, order, "w") };
+                srl(out, shift as u32) as $value_type
+            }
+
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst);
+                let val = sll(val as u32, shift as u32);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                let out: u32 = unsafe { atomic_rmw_amo!(xor, dst, val, order, "w") };
+                srl(out, shift as u32) as $value_type
+            }
+
+            #[inline]
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $value_type {
+                const NOT_MASK: $value_type = (0 as $value_type).wrapping_sub(1);
+                self.fetch_xor(NOT_MASK, order)
+            }
+        }
+    };
+}
+
+atomic_sub_word!(AtomicI8, i8, "b");
+atomic_sub_word!(AtomicU8, u8, "b");
+atomic_sub_word!(AtomicI16, i16, "h");
+atomic_sub_word!(AtomicU16, u16, "h");
+atomic!(AtomicI32, i32, "w", max, min);
+atomic!(AtomicU32, u32, "w", maxu, minu);
 #[cfg(target_arch = "riscv64")]
-atomic!(AtomicI64, i64, "d");
+atomic!(AtomicI64, i64, "d", max, min);
 #[cfg(target_arch = "riscv64")]
-atomic!(AtomicU64, u64, "d");
+atomic!(AtomicU64, u64, "d", maxu, minu);
 #[cfg(target_pointer_width = "32")]
-atomic!(AtomicIsize, isize, "w");
+atomic!(AtomicIsize, isize, "w", max, min);
 #[cfg(target_pointer_width = "32")]
-atomic!(AtomicUsize, usize, "w");
+atomic!(AtomicUsize, usize, "w", maxu, minu);
 #[cfg(target_pointer_width = "32")]
-atomic!([T] AtomicPtr, *mut T, "w");
+atomic_ptr!([T] AtomicPtr, *mut T, "w");
 #[cfg(target_pointer_width = "64")]
-atomic!(AtomicIsize, isize, "d");
+atomic!(AtomicIsize, isize, "d", max, min);
 #[cfg(target_pointer_width = "64")]
-atomic!(AtomicUsize, usize, "d");
+atomic!(AtomicUsize, usize, "d", maxu, minu);
 #[cfg(target_pointer_width = "64")]
-atomic!([T] AtomicPtr, *mut T, "d");
+atomic_ptr!([T] AtomicPtr, *mut T, "d");
 
 #[cfg(test)]
 mod tests {
@@ -181,4 +384,195 @@ mod tests {
     test_atomic_int_load_store!(u64);
     test_atomic_int_load_store!(isize);
     test_atomic_int_load_store!(usize);
+
+    macro_rules! test_atomic_int_amo {
+        ($int_type:ident) => {
+            paste::paste! {
+                #[allow(
+                    clippy::alloc_instead_of_core,
+                    clippy::std_instead_of_alloc,
+                    clippy::std_instead_of_core,
+                    clippy::undocumented_unsafe_blocks
+                )]
+                mod [<test_atomic_ $int_type _amo>] {
+                    use super::*;
+                    test_atomic_int_amo!([<Atomic $int_type:camel>], $int_type);
+                }
+            }
+        };
+        ($atomic_type:ty, $int_type:ident) => {
+            ::quickcheck::quickcheck! {
+                fn quickcheck_swap(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.swap(y, order), x);
+                        assert_eq!(a.swap(x, order), y);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_add(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_add(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x.wrapping_add(y));
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_add(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y.wrapping_add(x));
+                    }
+                    true
+                }
+                fn quickcheck_fetch_sub(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_sub(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x.wrapping_sub(y));
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_sub(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y.wrapping_sub(x));
+                    }
+                    true
+                }
+                fn quickcheck_fetch_and(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_and(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x & y);
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_and(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y & x);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_or(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_or(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x | y);
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_or(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y | x);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_xor(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_xor(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x ^ y);
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_xor(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y ^ x);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_max(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_max(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(x, y));
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_max(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(y, x));
+                    }
+                    true
+                }
+                fn quickcheck_fetch_min(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_min(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(x, y));
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_min(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(y, x));
+                    }
+                    true
+                }
+                fn quickcheck_fetch_not(x: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_not(order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), !x);
+                        assert_eq!(a.fetch_not(order), !x);
+                        assert_eq!(a.load(Ordering::Relaxed), x);
+                    }
+                    true
+                }
+            }
+        };
+    }
+    macro_rules! test_atomic_int_amo_sub_word {
+        ($int_type:ident) => {
+            paste::paste! {
+                #[allow(
+                    clippy::alloc_instead_of_core,
+                    clippy::std_instead_of_alloc,
+                    clippy::std_instead_of_core,
+                    clippy::undocumented_unsafe_blocks
+                )]
+                mod [<test_atomic_ $int_type _amo>] {
+                    use super::*;
+                    test_atomic_int_amo_sub_word!([<Atomic $int_type:camel>], $int_type);
+                }
+            }
+        };
+        ($atomic_type:ty, $int_type:ident) => {
+            ::quickcheck::quickcheck! {
+                fn quickcheck_fetch_and(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_and(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x & y);
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_and(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y & x);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_or(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_or(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x | y);
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_or(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y | x);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_xor(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_xor(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x ^ y);
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_xor(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y ^ x);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_not(x: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_not(order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), !x);
+                        assert_eq!(a.fetch_not(order), !x);
+                        assert_eq!(a.load(Ordering::Relaxed), x);
+                    }
+                    true
+                }
+            }
+        };
+    }
+    test_atomic_int_amo_sub_word!(i8);
+    test_atomic_int_amo_sub_word!(u8);
+    test_atomic_int_amo_sub_word!(i16);
+    test_atomic_int_amo_sub_word!(u16);
+    test_atomic_int_amo!(i32);
+    test_atomic_int_amo!(u32);
+    #[cfg(target_arch = "riscv64")]
+    test_atomic_int_amo!(i64);
+    #[cfg(target_arch = "riscv64")]
+    test_atomic_int_amo!(u64);
+    test_atomic_int_amo!(isize);
+    test_atomic_int_amo!(usize);
 }
diff --git a/src/lib.rs b/src/lib.rs
index b923db8e..8d467946 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -406,6 +406,9 @@ compile_error!("cfg(portable_atomic_disable_fiq) does not compatible with this t
 #[cfg(portable_atomic_s_mode)]
 #[cfg(not(any(target_arch = "riscv32", target_arch = "riscv64")))]
 compile_error!("cfg(portable_atomic_s_mode) does not compatible with this target");
+#[cfg(portable_atomic_force_amo)]
+#[cfg(not(any(target_arch = "riscv32", target_arch = "riscv64")))]
+compile_error!("cfg(portable_atomic_force_amo) does not compatible with this target");
 
 #[cfg(portable_atomic_disable_fiq)]
 #[cfg(not(portable_atomic_unsafe_assume_single_core))]
@@ -417,6 +420,11 @@ compile_error!(
 compile_error!(
     "cfg(portable_atomic_s_mode) may only be used together with cfg(portable_atomic_unsafe_assume_single_core)"
 );
+#[cfg(portable_atomic_force_amo)]
+#[cfg(not(portable_atomic_unsafe_assume_single_core))]
+compile_error!(
+    "cfg(portable_atomic_force_amo) may only be used together with cfg(portable_atomic_unsafe_assume_single_core)"
+);
 
 #[cfg(all(portable_atomic_unsafe_assume_single_core, feature = "critical-section"))]
 compile_error!(
diff --git a/src/utils.rs b/src/utils.rs
index ded53852..42a12744 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -813,10 +813,53 @@ pub(crate) struct Pair<T: Copy> {
     pub(crate) lo: T,
 }
 
+#[allow(dead_code)]
+type MinWord = u32;
+#[cfg(target_arch = "riscv32")]
+type RegSize = u32;
+#[cfg(target_arch = "riscv64")]
+type RegSize = u64;
+// Adapted from https://github.com/taiki-e/atomic-maybe-uninit/blob/v0.3.0/src/utils.rs#L210.
+// Helper for implementing sub-word atomic operations using word-sized LL/SC loop or CAS loop.
+//
+// Refs: https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/CodeGen/AtomicExpandPass.cpp#L699
+// (aligned_ptr, shift, mask)
+#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+#[allow(dead_code)]
+#[inline]
+pub(crate) fn create_sub_word_mask_values<T>(ptr: *mut T) -> (*mut MinWord, RegSize, RegSize) {
+    use core::mem;
+    const SHIFT_MASK: bool = !cfg!(any(
+        target_arch = "riscv32",
+        target_arch = "riscv64",
+        target_arch = "loongarch64",
+        target_arch = "s390x",
+    ));
+    let ptr_mask = mem::size_of::<MinWord>() - 1;
+    let aligned_ptr = strict::with_addr(ptr, ptr as usize & !ptr_mask).cast::<MinWord>();
+    let ptr_lsb = if SHIFT_MASK {
+        ptr as usize & ptr_mask
+    } else {
+        // We use 32-bit wrapping shift instructions in asm on these platforms.
+        ptr as usize
+    };
+    let shift = if cfg!(any(target_endian = "little", target_arch = "s390x")) {
+        ptr_lsb.wrapping_mul(8)
+    } else {
+        (ptr_lsb ^ (mem::size_of::<MinWord>() - mem::size_of::<T>())).wrapping_mul(8)
+    };
+    let mut mask: RegSize = (1 << (mem::size_of::<T>() * 8)) - 1; // !(0 as T) as RegSize
+    if SHIFT_MASK {
+        mask <<= shift;
+    }
+    (aligned_ptr, shift as RegSize, mask)
+}
+
 /// Emulate strict provenance.
 ///
 /// Once strict_provenance is stable, migrate to the standard library's APIs.
-#[cfg(miri)]
+#[cfg(any(miri, target_arch = "riscv32", target_arch = "riscv64"))]
+#[allow(dead_code)]
 #[allow(clippy::cast_possible_wrap)]
 pub(crate) mod strict {
     /// Replace the address portion of this pointer with a new address.
diff --git a/tools/build.sh b/tools/build.sh
index 69cc9a7b..725002fc 100755
--- a/tools/build.sh
+++ b/tools/build.sh
@@ -125,7 +125,7 @@ known_cfgs=(
 # - env.TEST_FEATURES in .github/workflows/ci.yml.
 # - test_features list in tools/test.sh.
 test_features="float,std,serde,critical-section"
-exclude_features="unsafe-assume-single-core,s-mode,disable-fiq"
+exclude_features="unsafe-assume-single-core,s-mode,force-amo,disable-fiq"
 
 x() {
     local cmd="$1"
@@ -441,6 +441,12 @@ build() {
                                     CARGO_TARGET_DIR="${target_dir}/api-test-assume-single-core-s-mode" \
                                         RUSTFLAGS="${target_rustflags} --cfg portable_atomic_unsafe_assume_single_core --cfg portable_atomic_s_mode" \
                                         x_cargo "${args[@]}" --feature-powerset --manifest-path tests/api-test/Cargo.toml "$@"
+                                    # .option arch requires 1.72
+                                    if [[ "${rustc_minor_version}" -ge 72 ]]; then
+                                        CARGO_TARGET_DIR="${target_dir}/api-test-assume-single-core-force-amo" \
+                                            RUSTFLAGS="${target_rustflags} --cfg portable_atomic_unsafe_assume_single_core --cfg portable_atomic_force_amo" \
+                                            x_cargo "${args[@]}" --feature-powerset --manifest-path tests/api-test/Cargo.toml "$@"
+                                    fi
                                     ;;
                             esac
                             ;;
@@ -481,6 +487,12 @@ build() {
                                     CARGO_TARGET_DIR="${target_dir}/assume-single-core-s-mode" \
                                         RUSTFLAGS="${target_rustflags} --cfg portable_atomic_unsafe_assume_single_core --cfg portable_atomic_s_mode" \
                                         x_cargo "${args[@]}" --exclude-features "critical-section" "$@"
+                                    # .option arch requires 1.72
+                                    if [[ "${rustc_minor_version}" -ge 72 ]]; then
+                                        CARGO_TARGET_DIR="${target_dir}/assume-single-core-force-amo" \
+                                            RUSTFLAGS="${target_rustflags} --cfg portable_atomic_unsafe_assume_single_core --cfg portable_atomic_force_amo" \
+                                            x_cargo "${args[@]}" --exclude-features "critical-section" "$@"
+                                    fi
                                     ;;
                             esac
                             ;;
diff --git a/tools/no-std.sh b/tools/no-std.sh
index 6f6d7f2c..3c7d8db0 100755
--- a/tools/no-std.sh
+++ b/tools/no-std.sh
@@ -88,6 +88,8 @@ if [[ -z "${is_custom_toolchain}" ]]; then
 fi
 rustc_target_list=$(rustc ${pre_args[@]+"${pre_args[@]}"} --print target-list)
 rustc_version=$(rustc ${pre_args[@]+"${pre_args[@]}"} -Vv | grep 'release: ' | sed 's/release: //')
+rustc_minor_version="${rustc_version#*.}"
+rustc_minor_version="${rustc_minor_version%%.*}"
 metadata=$(cargo metadata --format-version=1 --no-deps)
 target_dir=$(jq <<<"${metadata}" -r '.target_directory')
 nightly=''
@@ -196,6 +198,17 @@ run() {
                     RUSTFLAGS="${target_rustflags} --cfg portable_atomic_disable_fiq" \
                     x_cargo "${args[@]}" --release "$@"
                 ;;
+            riscv??i-* | riscv??im-* | riscv??imc-*)
+                # .option arch requires 1.72
+                if [[ "${rustc_minor_version}" -ge 72 ]]; then
+                    CARGO_TARGET_DIR="${target_dir}/no-std-test-force-amo" \
+                        RUSTFLAGS="${target_rustflags} --cfg portable_atomic_force_amo" \
+                        x_cargo "${args[@]}" "$@"
+                    CARGO_TARGET_DIR="${target_dir}/no-std-test-force-amo" \
+                        RUSTFLAGS="${target_rustflags} --cfg portable_atomic_force_amo" \
+                        x_cargo "${args[@]}" --release "$@"
+                fi
+                ;;
         esac
     )
 }