Skip to content

Commit

Permalink
refactor(parser): make Source::set_position safe (oxc-project#2341)
Browse files Browse the repository at this point in the history
Make `Source::set_position` a safe function.

This addresses a shortcoming of oxc-project#2288.

Instead of requiring caller of `Source::set_position` to guarantee that the `SourcePosition` is created from this `Source`, the preceding PRs enforce this guarantee at the type level.

`Source::set_position` is going to be a central API for transitioning the lexer to processing the source as bytes, rather than `char`s (and the anticipated speed-ups that will produce). So making this method safe will remove the need for a *lot* of unsafe code blocks, and boilerplate comments promising "SAFETY: There's only one `Source`", when to the developer, this is blindingly obvious anyway.

So, while splitting the parser into `Parser` and `ParserImpl` (oxc-project#2339) is an annoying change to have to make, I believe the benefit of this PR justifies it.
  • Loading branch information
overlookmotel authored and IWANABETHATGUY committed May 29, 2024
1 parent a74e1f8 commit 2baa31f
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 28 deletions.
4 changes: 1 addition & 3 deletions crates/oxc_parser/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,7 @@ impl<'a> ParserImpl<'a> {
let ParserCheckpoint { lexer, cur_token, prev_span_end, errors_pos: errors_lens } =
checkpoint;

// SAFETY: Parser only ever creates a single `Lexer`,
// therefore all checkpoints must be created from it.
unsafe { self.lexer.rewind(lexer) };
self.lexer.rewind(lexer);
self.token = cur_token;
self.prev_token_end = prev_span_end;
self.errors.truncate(errors_lens);
Expand Down
21 changes: 4 additions & 17 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,8 @@ impl<'a> Lexer<'a> {
}

/// Rewinds the lexer to the same state as when the passed in `checkpoint` was created.
///
/// # SAFETY
/// `checkpoint` must have been created from this `Lexer`.
#[allow(clippy::missing_safety_doc)] // Clippy is wrong!
pub unsafe fn rewind(&mut self, checkpoint: LexerCheckpoint<'a>) {
pub fn rewind(&mut self, checkpoint: LexerCheckpoint<'a>) {
self.errors.truncate(checkpoint.errors_pos);
// SAFETY: Caller guarantees `checkpoint` was created from this `Lexer`,
// and therefore `checkpoint.position` was created from `self.source`.
self.source.set_position(checkpoint.position);
self.token = checkpoint.token;
self.lookahead.clear();
Expand All @@ -178,10 +172,7 @@ impl<'a> Lexer<'a> {
let position = self.source.position();

if let Some(lookahead) = self.lookahead.back() {
// SAFETY: `self.lookahead` only contains lookaheads created by this `Lexer`.
// `self.source` never changes, so `lookahead.position` must have been created
// from `self.source`.
unsafe { self.source.set_position(lookahead.position) };
self.source.set_position(lookahead.position);
}

for _i in self.lookahead.len()..n {
Expand All @@ -197,8 +188,7 @@ impl<'a> Lexer<'a> {
// read, so that's not possible. So no need to restore `self.token` here.
// It's already in same state as it was at start of this function.

// SAFETY: `position` was created above from `self.source`. `self.source` never changes.
unsafe { self.source.set_position(position) };
self.source.set_position(position);

self.lookahead[n - 1].token
}
Expand All @@ -211,10 +201,7 @@ impl<'a> Lexer<'a> {
/// Main entry point
pub fn next_token(&mut self) -> Token {
if let Some(lookahead) = self.lookahead.pop_front() {
// SAFETY: `self.lookahead` only contains lookaheads created by this `Lexer`.
// `self.source` never changes, so `lookahead.position` must have been created
// from `self.source`.
unsafe { self.source.set_position(lookahead.position) };
self.source.set_position(lookahead.position);
return lookahead.token;
}
let kind = self.read_next_token();
Expand Down
20 changes: 12 additions & 8 deletions crates/oxc_parser/src/lexer/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,17 @@ impl<'a> Source<'a> {
}

/// Move current position.
///
/// # SAFETY
/// `pos` must be created from this `Source`, not another `Source`.
/// If this is the case, the invariants of `Source` are guaranteed to be upheld.
#[inline]
pub(super) unsafe fn set_position(&mut self, pos: SourcePosition) {
// `SourcePosition` always upholds the invariants of `Source`,
// as long as it's created from this `Source`.
pub(super) fn set_position(&mut self, pos: SourcePosition) {
// `SourcePosition` always upholds the invariants of `Source`, as long as it's created
// from this `Source`. `SourcePosition`s can only be created from a `Source`.
// `Source::new` takes a `UniquePromise`, which guarantees that it's the only `Source`
// in existence on this thread. `Source` is not `Sync` or `Send`, so no possibility another
// `Source` originated on another thread can "jump" onto this one.
// This is sufficient to guarantee that any `SourcePosition` that parser/lexer holds must be
// from this `Source`.
// This guarantee is what allows this function to be safe.

// SAFETY: `read_u8`'s contract is upheld by:
// * The preceding checks that `pos.ptr` >= `self.start` and < `self.end`.
// * `Source`'s invariants guarantee that `self.start` - `self.end` contains allocated memory.
Expand All @@ -157,7 +160,8 @@ impl<'a> Source<'a> {
debug_assert!(
pos.ptr >= self.start
&& pos.ptr <= self.end
&& (pos.ptr == self.end || !is_utf8_cont_byte(read_u8(pos.ptr)))
// SAFETY: See above
&& (pos.ptr == self.end || !is_utf8_cont_byte(unsafe { read_u8(pos.ptr) }))
);
self.ptr = pos.ptr;
}
Expand Down

0 comments on commit 2baa31f

Please sign in to comment.