Skip to content

Commit

Permalink
perf(parser): reduce Token size from 16 to 12 bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
Boshen committed Jan 13, 2024
1 parent 6996948 commit 99d1179
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 31 deletions.
42 changes: 18 additions & 24 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod string_builder;
mod token;
mod trivia_builder;

use rustc_hash::FxHashMap;
use std::{collections::VecDeque, str::Chars};

use oxc_allocator::{Allocator, String};
Expand All @@ -30,7 +31,7 @@ pub use self::{
number::{parse_big_int, parse_float, parse_int},
token::Token,
};
use self::{string_builder::AutoCow, token::EscapedId, trivia_builder::TriviaBuilder};
use self::{string_builder::AutoCow, trivia_builder::TriviaBuilder};
use crate::{diagnostics, MAX_LEN};

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -67,11 +68,12 @@ pub struct Lexer<'a> {

pub(crate) trivia_builder: TriviaBuilder,

/// Data store for escaped strings, indexed by `Token.escaped_string_id`
escaped_strings: Vec<&'a str>,
/// Data store for escaped templates, indexed by `Token.escaped_string_id`
/// Data store for escaped strings, indexed by [Token::start] when [Token::escaped] is true
escaped_strings: FxHashMap<u32, &'a str>,

/// Data store for escaped templates, indexed by [Token::start] when [Token::escaped] is true
/// `None` is saved when the string contains an invalid escape sequence.
escaped_templates: Vec<Option<&'a str>>,
escaped_templates: FxHashMap<u32, Option<&'a str>>,
}

#[allow(clippy::unused_self)]
Expand All @@ -97,8 +99,8 @@ impl<'a> Lexer<'a> {
lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript
context: LexerContext::Regular,
trivia_builder: TriviaBuilder::default(),
escaped_strings: vec![],
escaped_templates: vec![],
escaped_strings: FxHashMap::default(),
escaped_templates: FxHashMap::default(),
}
}

Expand Down Expand Up @@ -309,21 +311,17 @@ impl<'a> Lexer<'a> {
/// Save the string if it is escaped
/// This reduces the overall memory consumption while keeping the `Token` size small
/// Strings without escaped values can be retrieved as is from the token span
#[allow(clippy::cast_possible_truncation)]
fn save_string(&mut self, has_escape: bool, s: &'a str) {
if !has_escape {
return;
}
self.escaped_strings.push(s);
let escaped_string_id = self.escaped_strings.len() as u32;
// SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0
let escaped_string_id = unsafe { EscapedId::new_unchecked(escaped_string_id) };
self.current.token.escaped_id.replace(escaped_string_id);
self.escaped_strings.insert(self.current.token.start, s);
self.current.token.escaped = true;
}

pub(crate) fn get_string(&self, token: Token) -> &'a str {
if let Some(escaped_id) = token.escaped_id {
return self.escaped_strings[escaped_id.get() as usize - 1];
if token.escaped {
return self.escaped_strings[&token.start];
}

let raw = &self.source[token.start as usize..token.end as usize];
Expand All @@ -336,7 +334,6 @@ impl<'a> Lexer<'a> {
}

/// Save the template if it is escaped
#[allow(clippy::cast_possible_truncation)]
fn save_template_string(
&mut self,
is_valid_escape_sequence: bool,
Expand All @@ -346,18 +343,15 @@ impl<'a> Lexer<'a> {
if !has_escape {
return;
}
self.escaped_templates.push(is_valid_escape_sequence.then(|| s));
let escaped_template_id = self.escaped_templates.len() as u32;
// SAFETY: escaped_string_id is the length of `self.escaped_strings` after an item is pushed, which can never be 0
let escaped_template_id = unsafe { EscapedId::new_unchecked(escaped_template_id) };
self.current.token.escaped_id.replace(escaped_template_id);
self.escaped_templates
.insert(self.current.token.start, is_valid_escape_sequence.then(|| s));
self.current.token.escaped = true;
}

pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> {
if let Some(escaped_id) = token.escaped_id {
return self.escaped_templates[escaped_id.get() as usize - 1];
if token.escaped {
return self.escaped_templates[&token.start];
}

let raw = &self.source[token.start as usize..token.end as usize];
Some(match token.kind {
Kind::NoSubstitutionTemplate | Kind::TemplateTail => {
Expand Down
13 changes: 6 additions & 7 deletions crates/oxc_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ use oxc_span::Span;

use super::kind::Kind;

pub type EscapedId = std::num::NonZeroU32;

#[derive(Debug, Clone, Copy, Default)]
pub struct Token {
/// Token Kind
Expand All @@ -20,14 +18,15 @@ pub struct Token {
/// Indicates the token is on a newline
pub is_on_new_line: bool,

/// A index handle to `Lexer::escaped_strings` or `Lexer::escaped_templates`
/// See https://floooh.github.io/2018/06/17/handles-vs-pointers.html for some background reading
pub escaped_id: Option<EscapedId>,
/// True if the identifier / string / template kinds has escaped strings.
/// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by
/// [Token::start]
pub escaped: bool,
}

#[cfg(target_pointer_width = "64")]
mod size_asserts {
oxc_index::assert_eq_size!(super::Token, [u8; 16]);
oxc_index::assert_eq_size!(super::Token, [u8; 12]);
}

impl Token {
Expand All @@ -36,6 +35,6 @@ impl Token {
}

pub fn escaped(&self) -> bool {
self.escaped_id.is_some()
self.escaped
}
}

0 comments on commit 99d1179

Please sign in to comment.