From e50a59baa770cb30aa4457209997b149fd748955 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Sun, 13 Aug 2023 21:28:13 +0200 Subject: [PATCH] Require 'unsafe' keyword for custom implementations of nom traits `nom_locate` progressively advances through a fragment by slicing it, but expects to be able to go backward by as much as it advanced. This is normally fine, but custom implementations of `nom` types could cause UB by implementing slicing incorrectly. After this commit, they will need to implement the unsafe trait `RewindableFragment`, putting the burden of soundness on these implementations. stephaneyfx provides an example of a maliciously constructed fragment type exercising this behavior: > This function is called from public and safe functions like get_line_beginning. It assumes that the current fragment is part of a larger fragment and attempts to read before the beginning of the current fragment. This assumption may be incorrect as demonstrated by the following program that exhibits UB without unsafe and outputs garbage (which can change on every run). ```rust use nom::{AsBytes, InputTake, Offset, Slice}; use nom_locate::LocatedSpan; use std::{ cell::Cell, ops::{RangeFrom, RangeTo}, rc::Rc, }; struct EvilInput<'a>(Rc>); impl<'a> AsBytes for EvilInput<'a> { fn as_bytes(&self) -> &[u8] { self.0.get() } } impl Offset for EvilInput<'_> { fn offset(&self, second: &Self) -> usize { self.as_bytes().offset(second.as_bytes()) } } impl Slice> for EvilInput<'_> { fn slice(&self, range: RangeFrom) -> Self { Self(Rc::new(Cell::new(self.0.get().slice(range)))) } } impl Slice> for EvilInput<'_> { fn slice(&self, range: RangeTo) -> Self { Self(Rc::new(Cell::new(self.0.get().slice(range)))) } } fn main() { let new_input = [32u8]; let original_input = [33u8; 3]; let evil_input = EvilInput(Rc::new(Cell::new(&original_input))); let span = LocatedSpan::new(evil_input).take_split(2).0; span.fragment().0.set(&new_input); let beginning = span.get_line_beginning(); dbg!(beginning); dbg!(new_input.as_ptr() as usize - beginning.as_ptr() as usize); } ``` Example output: ``` [src/main.rs:43] beginning = [ 201, 127, 32, ] [src/main.rs:44] new_input.as_ptr() as usize - beginning.as_ptr() as usize = 2 ``` --- src/lib.rs | 44 ++++++++++++++++++++++++++++++++++++-- tests/integration_tests.rs | 4 ++-- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 66a19f4..b2408be 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,6 +118,46 @@ use nom::{ #[cfg(feature = "stable-deref-trait")] use stable_deref_trait::StableDeref; +/// Trait of types whose implementation of [`AsBytes`], if any, returns slices that +/// can be dereferenced with a negative offset (which is usually not allowed in Rust). +/// +/// Because the satefy of these implementations must be checked for every implementation, +/// `nom_locate` does not provide blanket implementations for any trait, but only for +/// concrete types. +/// +/// # Safety +/// +/// Implementations of `WellBehavedFragment` must uphold one invariant: each instance +/// `fragment` of the type has an `offset` property, and the fragment type satisfies +/// these assertions: +/// +/// * an instance's `offset` is constant for its lifetime (ie. it cannot change due +/// to interior mutability or global/external state) +/// * `offset` is nonnegative (ie. zero or greater; it is usually zero when passed +/// to [`LocatedSpan::new`]) +/// * if the type implements [`AsBytes`] then [`AsBytes::as_bytes`] must return a slice +/// whose underlying `*const u8` can be decremented by any number smaller or equal +/// to the `offset` and dereferenced safely. (ie. they are an offset in a larger +/// contiguous bye array) +/// * if the type implements [`Offset`], then the value returned by [`Offset::offset`] +/// must be equal to its `offset` (technically, they may safely return a value greater +/// than their `offset`, but it is unlikely to be correct, and may change in future +/// versions of `nom_locate`) +/// * if the type implements [`Slice`], then the new instance returned by [`Slice::slice`] +/// must have an `offset` equal to the original `offset` plus the `start` of the range +/// argument (ditto) +pub unsafe trait RewindableFragment {} + +unsafe impl RewindableFragment for [u8] {} +unsafe impl<'a> RewindableFragment for &'a [u8] {} +unsafe impl RewindableFragment for str {} +unsafe impl<'a> RewindableFragment for &'a str {} + +#[cfg(any(feature = "std", feature = "alloc"))] +unsafe impl RewindableFragment for Vec {} +#[cfg(any(feature = "std", feature = "alloc"))] +unsafe impl RewindableFragment for String {} + /// A LocatedSpan is a set of meta information about the location of a token, including extra /// information. /// @@ -323,7 +363,7 @@ impl LocatedSpan { } } -impl LocatedSpan { +impl LocatedSpan { // Attempt to get the "original" data slice back, by extending // self.fragment backwards by self.offset. // Note that any bytes truncated from after self.fragment will not @@ -660,7 +700,7 @@ macro_rules! impl_slice_ranges { impl<'a, T, R, X: Clone> Slice for LocatedSpan where - T: Slice + Offset + AsBytes + Slice>, + T: Slice + Offset + AsBytes + Slice> + RewindableFragment, { fn slice(&self, range: R) -> Self { let next_fragment = self.fragment.slice(range); diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 74dd0db..9137a97 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1,5 +1,5 @@ use nom::{error::ErrorKind, error_position, AsBytes, FindSubstring, IResult, InputLength, Slice}; -use nom_locate::LocatedSpan; +use nom_locate::{RewindableFragment, LocatedSpan}; use std::cmp; use std::fmt::Debug; use std::ops::{Range, RangeFull}; @@ -59,7 +59,7 @@ struct Position { fn test_str_fragments<'a, F, T>(parser: F, input: T, positions: Vec) where F: Fn(LocatedSpan) -> IResult, Vec>>, - T: InputLength + Slice> + Slice + Debug + PartialEq + AsBytes, + T: InputLength + Slice> + Slice + Debug + PartialEq + AsBytes + RewindableFragment, { let res = parser(LocatedSpan::new(input.slice(..))) .map_err(|err| {