From 99d11794200e1ae2e510ea7beab4c00d15c84914 Mon Sep 17 00:00:00 2001 From: Boshen Date: Sat, 13 Jan 2024 11:40:06 +0800 Subject: [PATCH] perf(parser): reduce Token size from 16 to 12 bytes --- crates/oxc_parser/src/lexer/mod.rs | 42 ++++++++++++---------------- crates/oxc_parser/src/lexer/token.rs | 13 ++++----- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index f563a64c7f72c4..d13f3c823899ee 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -11,6 +11,7 @@ mod string_builder; mod token; mod trivia_builder; +use rustc_hash::FxHashMap; use std::{collections::VecDeque, str::Chars}; use oxc_allocator::{Allocator, String}; @@ -30,7 +31,7 @@ pub use self::{ number::{parse_big_int, parse_float, parse_int}, token::Token, }; -use self::{string_builder::AutoCow, token::EscapedId, trivia_builder::TriviaBuilder}; +use self::{string_builder::AutoCow, trivia_builder::TriviaBuilder}; use crate::{diagnostics, MAX_LEN}; #[derive(Debug, Clone)] @@ -67,11 +68,12 @@ pub struct Lexer<'a> { pub(crate) trivia_builder: TriviaBuilder, - /// Data store for escaped strings, indexed by `Token.escaped_string_id` - escaped_strings: Vec<&'a str>, - /// Data store for escaped templates, indexed by `Token.escaped_string_id` + /// Data store for escaped strings, indexed by [Token::start] when [Token::escaped] is true + escaped_strings: FxHashMap, + + /// Data store for escaped templates, indexed by [Token::start] when [Token::escaped] is true /// `None` is saved when the string contains an invalid escape sequence. - escaped_templates: Vec>, + escaped_templates: FxHashMap>, } #[allow(clippy::unused_self)] @@ -97,8 +99,8 @@ impl<'a> Lexer<'a> { lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript context: LexerContext::Regular, trivia_builder: TriviaBuilder::default(), - escaped_strings: vec![], - escaped_templates: vec![], + escaped_strings: FxHashMap::default(), + escaped_templates: FxHashMap::default(), } } @@ -309,21 +311,17 @@ impl<'a> Lexer<'a> { /// Save the string if it is escaped /// This reduces the overall memory consumption while keeping the `Token` size small /// Strings without escaped values can be retrieved as is from the token span - #[allow(clippy::cast_possible_truncation)] fn save_string(&mut self, has_escape: bool, s: &'a str) { if !has_escape { return; } - self.escaped_strings.push(s); - let escaped_string_id = self.escaped_strings.len() as u32; - // SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0 - let escaped_string_id = unsafe { EscapedId::new_unchecked(escaped_string_id) }; - self.current.token.escaped_id.replace(escaped_string_id); + self.escaped_strings.insert(self.current.token.start, s); + self.current.token.escaped = true; } pub(crate) fn get_string(&self, token: Token) -> &'a str { - if let Some(escaped_id) = token.escaped_id { - return self.escaped_strings[escaped_id.get() as usize - 1]; + if token.escaped { + return self.escaped_strings[&token.start]; } let raw = &self.source[token.start as usize..token.end as usize]; @@ -336,7 +334,6 @@ impl<'a> Lexer<'a> { } /// Save the template if it is escaped - #[allow(clippy::cast_possible_truncation)] fn save_template_string( &mut self, is_valid_escape_sequence: bool, @@ -346,18 +343,15 @@ impl<'a> Lexer<'a> { if !has_escape { return; } - self.escaped_templates.push(is_valid_escape_sequence.then(|| s)); - let escaped_template_id = self.escaped_templates.len() as u32; - // SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0 - let escaped_template_id = unsafe { EscapedId::new_unchecked(escaped_template_id) }; - self.current.token.escaped_id.replace(escaped_template_id); + self.escaped_templates + .insert(self.current.token.start, is_valid_escape_sequence.then(|| s)); + self.current.token.escaped = true; } pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> { - if let Some(escaped_id) = token.escaped_id { - return self.escaped_templates[escaped_id.get() as usize - 1]; + if token.escaped { + return self.escaped_templates[&token.start]; } - let raw = &self.source[token.start as usize..token.end as usize]; Some(match token.kind { Kind::NoSubstitutionTemplate | Kind::TemplateTail => { diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index a3fdae4f58b831..bd07135c849fee 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -4,8 +4,6 @@ use oxc_span::Span; use super::kind::Kind; -pub type EscapedId = std::num::NonZeroU32; - #[derive(Debug, Clone, Copy, Default)] pub struct Token { /// Token Kind @@ -20,14 +18,15 @@ pub struct Token { /// Indicates the token is on a newline pub is_on_new_line: bool, - /// A index handle to `Lexer::escaped_strings` or `Lexer::escaped_templates` - /// See https://floooh.github.io/2018/06/17/handles-vs-pointers.html for some background reading - pub escaped_id: Option, + /// True if the identifier / string / template kinds has escaped strings. + /// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by + /// [Token::start] + pub escaped: bool, } #[cfg(target_pointer_width = "64")] mod size_asserts { - oxc_index::assert_eq_size!(super::Token, [u8; 16]); + oxc_index::assert_eq_size!(super::Token, [u8; 12]); } impl Token { @@ -36,6 +35,6 @@ impl Token { } pub fn escaped(&self) -> bool { - self.escaped_id.is_some() + self.escaped } }