Skip to content

Commit

Permalink
parser(refactor): promise only one Source on a thread at a time (ox…
Browse files Browse the repository at this point in the history
…c-project#2340)

Introduce invariant that only a single `lexer::Source` can exist on a thread at one time.

This is a preparatory step for oxc-project#2341.

2 notes:

Restriction is only 1 x `ParserImpl` / `Lexer` / `Source` on 1 *thread* at a time, not globally. So this does not prevent parsing multiple files simultaneously on different threads.

Restriction does not apply to public type `Parser`, only `ParserImpl`. `ParserImpl`s are not created in created in `Parser::new`, but instead in `Parser::parse`, where they're created and then immediately consumed. So the end user is also free to create multiple `Parser` instances (if they want to for some reason) on the same thread.
  • Loading branch information
overlookmotel authored and IWANABETHATGUY committed May 29, 2024
1 parent df72938 commit a74e1f8
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 24 deletions.
4 changes: 4 additions & 0 deletions crates/oxc_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,7 @@ oxc_ast = { workspace = true, features = ["serde"] }
miette = { workspace = true, features = ["fancy-no-backtrace"] }
serde_json = { workspace = true }
ouroboros = "0.18.3" # for `multi-thread` example

[features]
# Expose Lexer for benchmarks
benchmarking = []
27 changes: 24 additions & 3 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub use self::{
number::{parse_big_int, parse_float, parse_int},
token::Token,
};
use crate::diagnostics;
use crate::{diagnostics, UniquePromise};

#[derive(Debug, Clone, Copy)]
pub struct LexerCheckpoint<'a> {
Expand Down Expand Up @@ -97,8 +97,17 @@ pub struct Lexer<'a> {

#[allow(clippy::unused_self)]
impl<'a> Lexer<'a> {
pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self {
let source = Source::new(source_text);
/// Create new `Lexer`.
///
/// Requiring a `UniquePromise` to be provided guarantees only 1 `Lexer` can exist
/// on a single thread at one time.
pub(super) fn new(
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
unique: UniquePromise,
) -> Self {
let source = Source::new(source_text, unique);

// The first token is at the start of file, so is allows on a new line
let token = Token::new_on_new_line();
Expand All @@ -116,6 +125,18 @@ impl<'a> Lexer<'a> {
}
}

/// Backdoor to create a `Lexer` without holding a `UniquePromise`, for benchmarks.
/// This function must NOT be exposed in public API as it breaks safety invariants.
#[cfg(feature = "benchmarking")]
pub fn new_for_benchmarks(
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
) -> Self {
let unique = UniquePromise::new_for_tests();
Self::new(allocator, source_text, source_type, unique)
}

/// Remaining string from `Chars`
pub fn remaining(&self) -> &'a str {
self.source.remaining()
Expand Down
8 changes: 6 additions & 2 deletions crates/oxc_parser/src/lexer/source.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#![allow(clippy::unnecessary_safety_comment)]

use crate::MAX_LEN;
use crate::{UniquePromise, MAX_LEN};

use std::{marker::PhantomData, slice, str};

Expand Down Expand Up @@ -72,7 +72,11 @@ pub(super) struct Source<'a> {

impl<'a> Source<'a> {
/// Create `Source` from `&str`.
pub(super) fn new(mut source_text: &'a str) -> Self {
///
/// Requiring a `UniquePromise` to be provided guarantees only 1 `Source` can exist
/// on a single thread at one time.
#[allow(clippy::needless_pass_by_value)]
pub(super) fn new(mut source_text: &'a str, _unique: UniquePromise) -> Self {
// If source text exceeds size limit, substitute a short source text which will fail to parse.
// `Parser::parse` will convert error to `diagnostics::OverlongSource`.
if source_text.len() > MAX_LEN {
Expand Down
95 changes: 80 additions & 15 deletions crates/oxc_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,13 @@ mod jsx;
mod ts;

mod diagnostics;

// Expose lexer only in benchmarks
#[cfg(not(feature = "benchmarking"))]
mod lexer;
#[cfg(feature = "benchmarking")]
#[doc(hidden)]
pub mod lexer;

use context::{Context, StatementContext};
use oxc_allocator::Allocator;
Expand All @@ -84,12 +90,6 @@ use crate::{
state::ParserState,
};

// Expose lexer for benchmarks
#[doc(hidden)]
pub mod __lexer {
pub use super::lexer::{Kind, Lexer, Token};
}

/// Maximum length of source which can be parsed (in bytes).
/// ~4 GiB on 64-bit systems, ~2 GiB on 32-bit systems.
// Length is constrained by 2 factors:
Expand Down Expand Up @@ -165,17 +165,63 @@ impl<'a> Parser<'a> {
self.options.preserve_parens = allow;
self
}
}

/// Main entry point
mod parser_parse {
use super::*;

/// `UniquePromise` is a way to use the type system to enforce the invariant that only
/// a single `ParserImpl`, `Lexer` and `lexer::Source` can exist at any time on a thread.
/// This constraint is required to guarantee the soundness of some methods of these types
/// e.g. `Source::set_position`.
///
/// Returns an empty `Program` on unrecoverable error,
/// Recoverable errors are stored inside `errors`.
pub fn parse(self) -> ParserReturn<'a> {
let parser =
ParserImpl::new(self.allocator, self.source_text, self.source_type, self.options);
parser.parse()
/// `ParserImpl::new`, `Lexer::new` and `lexer::Source::new` all require a `UniquePromise`
/// to be provided to them. `UniquePromise::new` is not visible outside this module, so only
/// `Parser::parse` can create one, and it only calls `ParserImpl::new` once.
/// This enforces the invariant throughout the entire parser.
///
/// `UniquePromise` is a zero-sized type and has no runtime cost. It's purely for the type-checker.
///
/// `UniquePromise::new_for_tests` is a backdoor for unit tests and benchmarks, so they can create a
/// `ParserImpl` or `Lexer`, and manipulate it directly, for testing/benchmarking purposes.
pub(crate) struct UniquePromise {
_dummy: (),
}

impl UniquePromise {
#[inline]
fn new() -> Self {
Self { _dummy: () }
}

/// Backdoor for tests/benchmarks to create a `UniquePromise` (see above).
/// This function must NOT be exposed outside of tests and benchmarks,
/// as it allows circumventing safety invariants of the parser.
#[cfg(any(test, feature = "benchmarking"))]
pub fn new_for_tests() -> Self {
Self { _dummy: () }
}
}

impl<'a> Parser<'a> {
/// Main entry point
///
/// Returns an empty `Program` on unrecoverable error,
/// Recoverable errors are stored inside `errors`.
pub fn parse(self) -> ParserReturn<'a> {
let unique = UniquePromise::new();
let parser = ParserImpl::new(
self.allocator,
self.source_text,
self.source_type,
self.options,
unique,
);
parser.parse()
}
}
}
use parser_parse::UniquePromise;

/// Implementation of parser.
/// `Parser` is just a public wrapper, the guts of the implementation is in this type.
Expand Down Expand Up @@ -213,15 +259,20 @@ struct ParserImpl<'a> {
}

impl<'a> ParserImpl<'a> {
/// Create a new parser
/// Create a new `ParserImpl`.
///
/// Requiring a `UniquePromise` to be provided guarantees only 1 `ParserImpl` can exist
/// on a single thread at one time.
#[inline]
pub fn new(
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
options: ParserOptions,
unique: UniquePromise,
) -> Self {
Self {
lexer: Lexer::new(allocator, source_text, source_type),
lexer: Lexer::new(allocator, source_text, source_type, unique),
source_type,
source_text,
errors: vec![],
Expand All @@ -234,10 +285,24 @@ impl<'a> ParserImpl<'a> {
}
}

/// Backdoor to create a `ParserImpl` without holding a `UniquePromise`, for unit tests.
/// This function must NOT be exposed in public API as it breaks safety invariants.
#[cfg(test)]
fn new_for_tests(
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
options: ParserOptions,
) -> Self {
let unique = UniquePromise::new_for_tests();
Self::new(allocator, source_text, source_type, options, unique)
}

/// Main entry point
///
/// Returns an empty `Program` on unrecoverable error,
/// Recoverable errors are stored inside `errors`.
#[inline]
pub fn parse(mut self) -> ParserReturn<'a> {
let (program, panicked) = match self.parse_program() {
Ok(program) => (program, false),
Expand Down
3 changes: 2 additions & 1 deletion crates/oxc_parser/src/ts/declaration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ mod test_is_declaration {
fn run_check(source: &str, expected: bool) {
let alloc = Allocator::default();
let source_type = SourceType::default().with_typescript(true);
let mut parser = ParserImpl::new(&alloc, source, source_type, ParserOptions::default());
let mut parser =
ParserImpl::new_for_tests(&alloc, source, source_type, ParserOptions::default());
// Get the parser to the first token.
parser.bump_any();
assert_eq!(expected, parser.at_start_of_ts_declaration());
Expand Down
2 changes: 1 addition & 1 deletion tasks/benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ harness = false
oxc_allocator = { workspace = true }
oxc_linter = { workspace = true }
oxc_minifier = { workspace = true }
oxc_parser = { workspace = true }
oxc_parser = { workspace = true, features = ["benchmarking"] }
oxc_prettier = { workspace = true }
oxc_semantic = { workspace = true }
oxc_span = { workspace = true }
Expand Down
4 changes: 2 additions & 2 deletions tasks/benchmark/benches/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use oxc_allocator::Allocator;
use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion};
use oxc_parser::__lexer::{Kind, Lexer};
use oxc_parser::lexer::{Kind, Lexer};
use oxc_span::SourceType;
use oxc_tasks_common::{TestFile, TestFiles};

Expand Down Expand Up @@ -32,7 +32,7 @@ fn bench_lexer(criterion: &mut Criterion) {
// Otherwise the allocator will allocate huge memory chunks (by power of two) from the
// system allocator, which makes time measurement unequal during long runs.
let allocator = Allocator::default();
let mut lexer = Lexer::new(&allocator, source_text, source_type);
let mut lexer = Lexer::new_for_benchmarks(&allocator, source_text, source_type);
while lexer.next_token().kind != Kind::Eof {}
allocator
});
Expand Down

0 comments on commit a74e1f8

Please sign in to comment.