Skip to content

Commit

Permalink
perf(parser): lexer match byte not char (#2025)
Browse files Browse the repository at this point in the history
2 related changes to lexer's `read_next_token()`:

1. Hint to branch predictor that unicode identifiers and non-standard
whitespace are rare by marking that branch `#[cold]`.

2. The branch is on whether next character is ASCII or not. This check
only requires reading 1 byte, as ASCII characters are always single byte
in UTF8. So only do the work of getting a `char` in the cold path, once
it's established that character is not ASCII and this work is required.
  • Loading branch information
overlookmotel authored Jan 14, 2024
1 parent a356918 commit 60a927d
Showing 1 changed file with 19 additions and 16 deletions.
35 changes: 19 additions & 16 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,28 +374,31 @@ impl<'a> Lexer<'a> {
let offset = self.offset();
self.current.token.start = offset;

if let Some(c) = self.current.chars.clone().next() {
let kind = self.match_char(c);
if !matches!(
kind,
Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
) {
return kind;
}
} else {
let remaining = self.current.chars.as_str();
if remaining.is_empty() {
return Kind::Eof;
}
}
}

#[inline]
fn match_char(&mut self, c: char) -> Kind {
let size = c as usize;
let byte = remaining.as_bytes()[0];
let kind = if byte < 128 {
BYTE_HANDLERS[byte as usize](self)
} else {
self.match_unicode_char()
};

if size < 128 {
return BYTE_HANDLERS[size](self);
if !matches!(
kind,
Kind::WhiteSpace | Kind::NewLine | Kind::Comment | Kind::MultiLineComment
) {
return kind;
}
}
}

// `#[cold]` to hint to branch predictor that unicode identifiers and irregular whitespace are rare
#[cold]
fn match_unicode_char(&mut self) -> Kind {
let c = self.current.chars.clone().next().unwrap();
match c {
c if is_id_start_unicode(c) => {
let mut builder = AutoCow::new(self);
Expand Down

0 comments on commit 60a927d

Please sign in to comment.