From c5796207013313bef8e8c25ec08bdaa51f4cc9ed Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Fri, 1 Mar 2024 13:23:34 +0000 Subject: [PATCH] refactor(parser): small efficiencies in `byte_search` macro usage (#2554) A few small efficiencies in usage of `byte_search` macro for lexing comments. --- crates/oxc_parser/src/lexer/comment.rs | 28 ++++++++++++++++---------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs index 1e46f109194ce..119960ea5f95f 100644 --- a/crates/oxc_parser/src/lexer/comment.rs +++ b/crates/oxc_parser/src/lexer/comment.rs @@ -38,7 +38,7 @@ impl<'a> Lexer<'a> { self.trivia_builder .add_single_line_comment(self.token.start, self.source.offset_of(pos)); // SAFETY: Safe to consume `\r` or `\n` as both are ASCII - unsafe { pos = pos.add(1) }; + pos = unsafe { pos.add(1) }; // We've found the end. Do not continue searching. false } else { @@ -48,14 +48,14 @@ impl<'a> Lexer<'a> { // SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char. // So safe to advance `pos` by 1 and read 2 bytes. let next2 = unsafe { pos.add(1).read2() }; - if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 { + if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) { // Irregular line break self.trivia_builder .add_single_line_comment(self.token.start, self.source.offset_of(pos)); // Advance `pos` to after this char. // SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char, // so consuming 3 bytes will place `pos` on next UTF-8 char boundary. - unsafe { pos = pos.add(3) }; + pos = unsafe { pos.add(3) }; // We've found the end. Do not continue searching. false } else { @@ -63,7 +63,7 @@ impl<'a> Lexer<'a> { // Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching. // SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char, // so consuming 3 bytes will place `pos` on next UTF-8 char boundary. - unsafe { pos = pos.add(2) }; + pos = unsafe { pos.add(2) }; true } }) @@ -93,13 +93,15 @@ impl<'a> Lexer<'a> { continue_if: |next_byte, pos| { // Match found. Decide whether to continue searching. if next_byte == b'*' { - if pos.addr() < self.source.end_addr() - 1 { + // SAFETY: Next byte is `*` (ASCII) so after it is UTF-8 char boundary + let after_star = unsafe { pos.add(1) }; + if after_star.addr() < self.source.end_addr() { // If next byte isn't `/`, continue // SAFETY: Have checked there's at least 1 further byte to read - if unsafe { pos.add(1).read() } == b'/' { + if unsafe { after_star.read() } == b'/' { // Consume `*/` // SAFETY: Consuming `*/` leaves `pos` on a UTF-8 char boundary - unsafe { pos = pos.add(2) }; + pos = unsafe { pos.add(2) }; false } else { true @@ -115,8 +117,11 @@ impl<'a> Lexer<'a> { cold_branch(|| { // SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char. // So safe to advance `pos` by 1 and read 2 bytes. - let next2 = unsafe { pos.add(1).read2() }; - if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 { + let next2 = unsafe { + pos = pos.add(1); + pos.read2() + }; + if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) { // Irregular line break self.token.is_on_new_line = true; // Ideally we'd go on to `skip_multi_line_comment_after_line_break` here @@ -124,10 +129,11 @@ impl<'a> Lexer<'a> { // But irregular line breaks are rare anyway. } // Either way, continue searching. - // Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching. + // Skip 3 bytes (skipped 1 byte above, macro skips 1 more, so skip 1 more here + // to make 3), and continue searching. // SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char, // so consuming 3 bytes will place `pos` on next UTF-8 char boundary. - unsafe { pos = pos.add(2) }; + pos = unsafe { pos.add(1) }; true }) } else {