Skip to content

Commit

Permalink
Require 'unsafe' keyword for custom implementations of nom traits
Browse files Browse the repository at this point in the history
`nom_locate` progressively advances through a fragment by slicing it,
but expects to be able to go backward by as much as it advanced. This is
normally fine, but custom implementations of `nom` types could cause
UB by implementing slicing incorrectly.

After this commit, they will need to implement the unsafe trait
`RewindableFragment`, putting the burden of soundness on these
implementations.

stephaneyfx provides an example of a maliciously constructed fragment type
exercising this behavior:

> This function is called from public and safe functions like get_line_beginning. It assumes that the current fragment is part of a larger fragment and attempts to read before the beginning of the current fragment. This assumption may be incorrect as demonstrated by the following program that exhibits UB without unsafe and outputs garbage (which can change on every run).

```rust
use nom::{AsBytes, InputTake, Offset, Slice};
use nom_locate::LocatedSpan;
use std::{
    cell::Cell,
    ops::{RangeFrom, RangeTo},
    rc::Rc,
};

struct EvilInput<'a>(Rc<Cell<&'a [u8]>>);

impl<'a> AsBytes for EvilInput<'a> {
    fn as_bytes(&self) -> &[u8] {
        self.0.get()
    }
}

impl Offset for EvilInput<'_> {
    fn offset(&self, second: &Self) -> usize {
        self.as_bytes().offset(second.as_bytes())
    }
}

impl Slice<RangeFrom<usize>> for EvilInput<'_> {
    fn slice(&self, range: RangeFrom<usize>) -> Self {
        Self(Rc::new(Cell::new(self.0.get().slice(range))))
    }
}

impl Slice<RangeTo<usize>> for EvilInput<'_> {
    fn slice(&self, range: RangeTo<usize>) -> Self {
        Self(Rc::new(Cell::new(self.0.get().slice(range))))
    }
}

fn main() {
    let new_input = [32u8];
    let original_input = [33u8; 3];
    let evil_input = EvilInput(Rc::new(Cell::new(&original_input)));
    let span = LocatedSpan::new(evil_input).take_split(2).0;
    span.fragment().0.set(&new_input);
    let beginning = span.get_line_beginning();
    dbg!(beginning);
    dbg!(new_input.as_ptr() as usize - beginning.as_ptr() as usize);
}
```

Example output:

```
[src/main.rs:43] beginning = [
    201,
    127,
    32,
]
[src/main.rs:44] new_input.as_ptr() as usize - beginning.as_ptr() as usize = 2
```
  • Loading branch information
progval committed Aug 13, 2023
1 parent 73d2a24 commit e50a59b
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 4 deletions.
44 changes: 42 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,46 @@ use nom::{
#[cfg(feature = "stable-deref-trait")]
use stable_deref_trait::StableDeref;

/// Trait of types whose implementation of [`AsBytes`], if any, returns slices that
/// can be dereferenced with a negative offset (which is usually not allowed in Rust).
///
/// Because the satefy of these implementations must be checked for every implementation,
/// `nom_locate` does not provide blanket implementations for any trait, but only for
/// concrete types.
///
/// # Safety
///
/// Implementations of `WellBehavedFragment` must uphold one invariant: each instance
/// `fragment` of the type has an `offset` property, and the fragment type satisfies
/// these assertions:
///
/// * an instance's `offset` is constant for its lifetime (ie. it cannot change due
/// to interior mutability or global/external state)
/// * `offset` is nonnegative (ie. zero or greater; it is usually zero when passed
/// to [`LocatedSpan::new`])
/// * if the type implements [`AsBytes`] then [`AsBytes::as_bytes`] must return a slice
/// whose underlying `*const u8` can be decremented by any number smaller or equal
/// to the `offset` and dereferenced safely. (ie. they are an offset in a larger
/// contiguous bye array)
/// * if the type implements [`Offset`], then the value returned by [`Offset::offset`]
/// must be equal to its `offset` (technically, they may safely return a value greater
/// than their `offset`, but it is unlikely to be correct, and may change in future
/// versions of `nom_locate`)
/// * if the type implements [`Slice`], then the new instance returned by [`Slice::slice`]
/// must have an `offset` equal to the original `offset` plus the `start` of the range
/// argument (ditto)
pub unsafe trait RewindableFragment {}

unsafe impl RewindableFragment for [u8] {}
unsafe impl<'a> RewindableFragment for &'a [u8] {}
unsafe impl RewindableFragment for str {}
unsafe impl<'a> RewindableFragment for &'a str {}

#[cfg(any(feature = "std", feature = "alloc"))]
unsafe impl RewindableFragment for Vec<u8> {}
#[cfg(any(feature = "std", feature = "alloc"))]
unsafe impl RewindableFragment for String {}

/// A LocatedSpan is a set of meta information about the location of a token, including extra
/// information.
///
Expand Down Expand Up @@ -323,7 +363,7 @@ impl<T, X> LocatedSpan<T, X> {
}
}

impl<T: AsBytes, X> LocatedSpan<T, X> {
impl<T: AsBytes + RewindableFragment, X> LocatedSpan<T, X> {
// Attempt to get the "original" data slice back, by extending
// self.fragment backwards by self.offset.
// Note that any bytes truncated from after self.fragment will not
Expand Down Expand Up @@ -660,7 +700,7 @@ macro_rules! impl_slice_ranges {

impl<'a, T, R, X: Clone> Slice<R> for LocatedSpan<T, X>
where
T: Slice<R> + Offset + AsBytes + Slice<RangeTo<usize>>,
T: Slice<R> + Offset + AsBytes + Slice<RangeTo<usize>> + RewindableFragment,
{
fn slice(&self, range: R) -> Self {
let next_fragment = self.fragment.slice(range);
Expand Down
4 changes: 2 additions & 2 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use nom::{error::ErrorKind, error_position, AsBytes, FindSubstring, IResult, InputLength, Slice};
use nom_locate::LocatedSpan;
use nom_locate::{RewindableFragment, LocatedSpan};
use std::cmp;
use std::fmt::Debug;
use std::ops::{Range, RangeFull};
Expand Down Expand Up @@ -59,7 +59,7 @@ struct Position {
fn test_str_fragments<'a, F, T>(parser: F, input: T, positions: Vec<Position>)
where
F: Fn(LocatedSpan<T>) -> IResult<LocatedSpan<T>, Vec<LocatedSpan<T>>>,
T: InputLength + Slice<Range<usize>> + Slice<RangeFull> + Debug + PartialEq + AsBytes,
T: InputLength + Slice<Range<usize>> + Slice<RangeFull> + Debug + PartialEq + AsBytes + RewindableFragment,
{
let res = parser(LocatedSpan::new(input.slice(..)))
.map_err(|err| {
Expand Down

0 comments on commit e50a59b

Please sign in to comment.