refactor(parser): small efficiencies in byte_search macro usage (#2554

) A few small efficiencies in usage of `byte_search` macro for lexing comments.
oxc-project · Mar 1, 2024 · c579620 · c579620
1 parent 3d354d4
commit c579620
Showing 1 changed file with 17 additions and 11 deletions.
diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs
@@ -38,7 +38,7 @@ impl<'a> Lexer<'a> {
                     self.trivia_builder
                         .add_single_line_comment(self.token.start, self.source.offset_of(pos));
                     // SAFETY: Safe to consume `\r` or `\n` as both are ASCII
-                    unsafe { pos = pos.add(1) };
+                    pos = unsafe { pos.add(1) };
                     // We've found the end. Do not continue searching.
                     false
                 } else {
@@ -48,22 +48,22 @@ impl<'a> Lexer<'a> {
                         // SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
                         // So safe to advance `pos` by 1 and read 2 bytes.
                         let next2 = unsafe { pos.add(1).read2() };
-                        if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
+                        if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) {
                             // Irregular line break
                             self.trivia_builder
                                 .add_single_line_comment(self.token.start, self.source.offset_of(pos));
                             // Advance `pos` to after this char.
                             // SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
                             // so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
-                            unsafe { pos = pos.add(3) };
+                            pos = unsafe { pos.add(3) };
                             // We've found the end. Do not continue searching.
                             false
                         } else {
                             // Some other Unicode char beginning with `0xE2`.
                             // Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching.
                             // SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
                             // so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
-                            unsafe { pos = pos.add(2) };
+                            pos = unsafe { pos.add(2) };
                             true
                         }
                     })
@@ -93,13 +93,15 @@ impl<'a> Lexer<'a> {
             continue_if: |next_byte, pos| {
                 // Match found. Decide whether to continue searching.
                 if next_byte == b'*' {
-                    if pos.addr() < self.source.end_addr() - 1 {
+                    // SAFETY: Next byte is `*` (ASCII) so after it is UTF-8 char boundary
+                    let after_star = unsafe { pos.add(1) };
+                    if after_star.addr() < self.source.end_addr() {
                         // If next byte isn't `/`, continue
                         // SAFETY: Have checked there's at least 1 further byte to read
-                        if unsafe { pos.add(1).read() } == b'/' {
+                        if unsafe { after_star.read() } == b'/' {
                             // Consume `*/`
                             // SAFETY: Consuming `*/` leaves `pos` on a UTF-8 char boundary
-                            unsafe { pos = pos.add(2) };
+                            pos = unsafe { pos.add(2) };
                             false
                         } else {
                             true
@@ -115,19 +117,23 @@ impl<'a> Lexer<'a> {
                     cold_branch(|| {
                         // SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
                         // So safe to advance `pos` by 1 and read 2 bytes.
-                        let next2 = unsafe { pos.add(1).read2() };
-                        if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
+                        let next2 = unsafe {
+                            pos = pos.add(1);
+                            pos.read2()
+                        };
+                        if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) {
                             // Irregular line break
                             self.token.is_on_new_line = true;
                             // Ideally we'd go on to `skip_multi_line_comment_after_line_break` here
                             // but can't do that easily because can't use `return` in a closure.
                             // But irregular line breaks are rare anyway.
                         }
                         // Either way, continue searching.
-                        // Skip 3 bytes (macro skips 1 already, so skip 2 here), and continue searching.
+                        // Skip 3 bytes (skipped 1 byte above, macro skips 1 more, so skip 1 more here
+                        // to make 3), and continue searching.
                         // SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
                         // so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
-                        unsafe { pos = pos.add(2) };
+                        pos = unsafe { pos.add(1) };
                         true
                     })
                 } else {