Skip to content

Commit

Permalink
refactor(parser): name byte handler functions (#2301)
Browse files Browse the repository at this point in the history
This PR solves the problem of lexer byte handlers all being called
`core::ops::function::FnOnce::call_once` in the flame graphs on
CodSpeed, by defining them as named functions instead of closures.

Pure refactor, no substantive changes.
  • Loading branch information
overlookmotel authored Feb 5, 2024
1 parent 018674c commit 9811c3a
Showing 1 changed file with 63 additions and 17 deletions.
80 changes: 63 additions & 17 deletions crates/oxc_parser/src/lexer/byte_handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::diagnostics;
/// * Lexer must not be at end of file.
/// * `byte` must be next byte of source code, corresponding to current position
/// of `lexer.current.chars`.
/// * Only `BYTE_HANDLERS` for ASCII characters may use the `ascii_byte_handler!()` macro.
/// * Only `BYTE_HANDLERS` for ASCII characters may use the `ascii_byte_handler!` macro.
pub(super) unsafe fn handle_byte(byte: u8, lexer: &mut Lexer) -> Kind {
BYTE_HANDLERS[byte as usize](lexer)
}
Expand Down Expand Up @@ -38,6 +38,41 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UER, UER, UER, UER, UER, UER, UER, UER, // F
];

/// Macro for defining a byte handler.
///
/// Use `ascii_byte_handler!` macro for ASCII characters, which adds optimizations for ASCII.
///
/// Handlers are defined as functions instead of closures, so they have names in flame graphs.
///
/// ```
/// byte_handler!(UNI(lexer) {
/// lexer.unicode_char_handler()
/// });
/// ```
///
/// expands to:
///
/// ```
/// const UNI: ByteHandler = {
/// #[allow(non_snake_case)]
/// fn UNI(lexer: &mut Lexer) -> Kind {
/// lexer.unicode_char_handler()
/// }
/// UNI
/// };
/// ```
macro_rules! byte_handler {
($id:ident($lex:ident) $body:expr) => {
const $id: ByteHandler = {
#[allow(non_snake_case)]
fn $id($lex: &mut Lexer) -> Kind {
$body
}
$id
};
};
}

#[allow(clippy::unnecessary_safety_comment)]
/// Macro for defining byte handler for an ASCII character.
///
Expand All @@ -53,7 +88,8 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [
///
/// These assertions are unchecked (i.e. won't panic) and will cause UB if they're incorrect.
///
/// SAFETY: Only use this macro to define byte handlers for ASCII characters.
/// # SAFETY
/// Only use this macro to define byte handlers for ASCII characters.
///
/// ```
/// ascii_byte_handler!(SPS(lexer) {
Expand All @@ -65,20 +101,27 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [
/// expands to:
///
/// ```
/// const SPS: ByteHandler = |lexer| {
/// unsafe {
/// use assert_unchecked::assert_unchecked;
/// let s = lexer.current.chars.as_str();
/// assert_unchecked!(!s.is_empty());
/// assert_unchecked!(s.as_bytes()[0] < 128);
/// const SPS: ByteHandler = {
/// #[allow(non_snake_case)]
/// fn SPS(lexer: &mut Lexer) {
/// // SAFETY: This macro is only used for ASCII characters
/// unsafe {
/// use assert_unchecked::assert_unchecked;
/// let s = lexer.current.chars.as_str();
/// assert_unchecked!(!s.is_empty());
/// assert_unchecked!(s.as_bytes()[0] < 128);
/// }
/// {
/// lexer.consume_char();
/// Kind::WhiteSpace
/// }
/// }
/// lexer.consume_char();
/// Kind::WhiteSpace
/// SPS
/// };
/// ```
macro_rules! ascii_byte_handler {
($id:ident($lex:ident) $body:expr) => {
const $id: ByteHandler = |$lex| {
byte_handler!($id($lex) {
// SAFETY: This macro is only used for ASCII characters
unsafe {
use assert_unchecked::assert_unchecked;
Expand All @@ -87,7 +130,7 @@ macro_rules! ascii_byte_handler {
assert_unchecked!(s.as_bytes()[0] < 128);
}
$body
};
});
};
}

Expand Down Expand Up @@ -590,14 +633,17 @@ ascii_byte_handler!(L_Y(lexer) match &lexer.identifier_name_handler()[1..] {
});

// Non-ASCII characters.
// NB: Must not use `ascii_byte_handler!()` macro, as this handler is for non-ASCII chars.
#[allow(clippy::redundant_closure_for_method_calls)]
const UNI: ByteHandler = |lexer| lexer.unicode_char_handler();
// NB: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII chars.
byte_handler!(UNI(lexer) {
lexer.unicode_char_handler()
});

// UTF-8 continuation bytes (128-191) (i.e. middle of a multi-byte UTF-8 sequence)
// + and byte values which are not legal in UTF-8 strings (248-255).
// `handle_byte()` should only be called with 1st byte of a valid UTF-8 char,
// so something has gone wrong if we get here.
// https://en.wikipedia.org/wiki/UTF-8
// NB: Must not use `ascii_byte_handler!()` macro, as this handler is for non-ASCII bytes.
const UER: ByteHandler = |_| unreachable!();
// NB: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII bytes.
byte_handler!(UER(_lexer) {
unreachable!();
});

0 comments on commit 9811c3a

Please sign in to comment.