From 8d4b0efcc3fed9354666f1c637af973e5854a1f7 Mon Sep 17 00:00:00 2001 From: Boshen Date: Sun, 4 Feb 2024 17:38:09 +0800 Subject: [PATCH] feat(span): fix memory leak by implementing inlineable string for oxc_allocator closes #1803 This `String` is currently unsafe, but I won't to get miri working before introducing more changes. --- Cargo.lock | 40 ++----- Cargo.toml | 1 - .../oxc_semantic/src/module_record/builder.rs | 9 +- crates/oxc_span/Cargo.toml | 13 +- crates/oxc_span/src/atom.rs | 111 +++++++++++++----- 5 files changed, 104 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e4ab38164ad9fa..9babc6adefae88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -207,15 +207,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" -[[package]] -name = "castaway" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc" -dependencies = [ - "rustversion", -] - [[package]] name = "cc" version = "1.0.83" @@ -315,20 +306,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "compact_str" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f" -dependencies = [ - "castaway", - "cfg-if", - "itoa", - "ryu", - "serde", - "static_assertions", -] - [[package]] name = "console" version = "0.15.8" @@ -862,6 +839,15 @@ dependencies = [ "serde", ] +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" +dependencies = [ + "serde", +] + [[package]] name = "insta" version = "1.34.0" @@ -1724,7 +1710,7 @@ dependencies = [ name = "oxc_span" version = "0.6.0" dependencies = [ - "compact_str", + "inlinable_string", "miette", "serde", "tsify", @@ -2256,12 +2242,6 @@ dependencies = [ "untrusted", ] -[[package]] -name = "rustversion" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" - [[package]] name = "ryu" version = "1.0.16" diff --git a/Cargo.toml b/Cargo.toml index c7432dcecca6a4..e546d0cf4b03b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,7 +88,6 @@ assert-unchecked = { version = "0.1.2" } bpaf = { version = "0.9.9" } bitflags = { version = "2.4.2" } bumpalo = { version = "3.14.0" } -compact_str = { version = "0.7.1" } convert_case = { version = "0.6.0" } criterion = { version = "0.5.1", default-features = false } crossbeam-channel = { version = "0.5.11" } diff --git a/crates/oxc_semantic/src/module_record/builder.rs b/crates/oxc_semantic/src/module_record/builder.rs index 465774ace982fe..3503e477f604a6 100644 --- a/crates/oxc_semantic/src/module_record/builder.rs +++ b/crates/oxc_semantic/src/module_record/builder.rs @@ -283,11 +283,10 @@ impl ModuleRecordBuilder { }; let export_entry = ExportEntry { export_name: ExportExportName::Default(exported_name.span()), - local_name: id - .as_ref() - .map_or(ExportLocalName::Default(exported_name.span()), |ident| { - ExportLocalName::Name(NameSpan::new(ident.name.clone(), ident.span)) - }), + local_name: id.as_ref().map_or_else( + || ExportLocalName::Default(exported_name.span()), + |ident| ExportLocalName::Name(NameSpan::new(ident.name.clone(), ident.span)), + ), span: decl.declaration.span(), ..ExportEntry::default() }; diff --git a/crates/oxc_span/Cargo.toml b/crates/oxc_span/Cargo.toml index e5168cb60b08e5..c4b067f16a9927 100644 --- a/crates/oxc_span/Cargo.toml +++ b/crates/oxc_span/Cargo.toml @@ -18,16 +18,17 @@ workspace = true [lib] doctest = false -[features] -default = [] -serde = ["dep:serde", "compact_str/serde"] -wasm = ["dep:tsify", "dep:wasm-bindgen"] - [dependencies] miette = { workspace = true } -compact_str = { workspace = true } +inlinable_string = "0.1.15" tsify = { workspace = true, optional = true } wasm-bindgen = { workspace = true, optional = true } serde = { workspace = true, features = ["derive"], optional = true } + +[features] +default = [] +serde = ["dep:serde", "inlinable_string/serde"] +wasm = ["dep:tsify", "dep:wasm-bindgen"] + diff --git a/crates/oxc_span/src/atom.rs b/crates/oxc_span/src/atom.rs index 1d3a5ca2fa9953..f2935ea8a0a067 100644 --- a/crates/oxc_span/src/atom.rs +++ b/crates/oxc_span/src/atom.rs @@ -1,17 +1,15 @@ use std::{ borrow::{Borrow, Cow}, - fmt, + fmt, hash, ops::Deref, }; -use compact_str::CompactString; #[cfg(feature = "serde")] -use serde::Serialize; +use serde::{Serialize, Serializer}; -/// Newtype for [`CompactString`] -#[derive(Clone, Eq, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize))] -pub struct Atom(CompactString); +use inlinable_string::inline_string::{InlineString, INLINE_STRING_CAPACITY}; + +const BASE54_CHARS: &[u8; 64] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789"; #[cfg_attr( all(feature = "serde", feature = "wasm"), @@ -22,16 +20,57 @@ const TS_APPEND_CONTENT: &'static str = r#" export type Atom = string; "#; -const BASE54_CHARS: &[u8; 64] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789"; +/// An inlinable string for oxc_allocator. +/// +/// SAFETY: It is unsafe to use this string after the allocator is dropped. +/// +#[derive(Clone, Eq, Hash)] +pub struct Atom(AtomImpl); + +/// Immutable Inlinable String +/// +/// https://github.com/fitzgen/inlinable_string/blob/master/src/lib.rs +#[derive(Clone, Eq, PartialEq)] +enum AtomImpl { + /// A arena heap-allocated string. + Arena(&'static str), + /// A heap-allocated string. + Heap(String), + /// A small string stored inline. + Inline(InlineString), +} + +#[cfg(feature = "serde")] +impl Serialize for Atom { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(self.as_str()) + } +} impl Atom { - pub const fn new_inline(s: &str) -> Self { - Self(CompactString::new_inline(s)) + pub fn new_inline(s: &str) -> Self { + Self(AtomImpl::Inline(InlineString::from(s))) } #[inline] pub fn as_str(&self) -> &str { - self.0.as_str() + match &self.0 { + AtomImpl::Arena(s) => s, + AtomImpl::Heap(s) => s, + AtomImpl::Inline(s) => s.as_ref(), + } + } + + #[inline] + pub fn into_string(self) -> String { + match self.0 { + AtomImpl::Arena(s) => String::from(s), + AtomImpl::Heap(s) => s, + AtomImpl::Inline(s) => s.to_string(), + } } /// Get the shortest mangled name for a given n. @@ -41,7 +80,7 @@ impl Atom { // Base 54 at first because these are the usable first characters in JavaScript identifiers // let base = 54usize; - let mut ret = CompactString::default(); + let mut ret = String::new(); ret.push(BASE54_CHARS[num % base] as char); num /= base; // Base 64 for the rest because after the first character we can also use 0-9 too @@ -52,7 +91,18 @@ impl Atom { ret.push(BASE54_CHARS[num % base] as char); num /= base; } - Self(ret) + Self(AtomImpl::Heap(ret)) + } +} + +impl<'a> From<&'a str> for Atom { + fn from(s: &'a str) -> Self { + if s.len() <= INLINE_STRING_CAPACITY { + Self(AtomImpl::Inline(InlineString::from(s))) + } else { + // SAFETY: It is unsafe to use this string after the allocator is dropped. + Self(AtomImpl::Arena(unsafe { std::mem::transmute(s) })) + } } } @@ -60,62 +110,67 @@ impl Deref for Atom { type Target = str; fn deref(&self) -> &Self::Target { - self.0.as_str() - } -} - -impl<'a> From<&'a str> for Atom { - fn from(s: &'a str) -> Self { - Self(s.into()) + self.as_str() } } impl From for Atom { fn from(s: String) -> Self { - Self(s.into()) + Self(AtomImpl::Heap(s)) } } impl From> for Atom { fn from(s: Cow<'_, str>) -> Self { - Self(s.into()) + Self(AtomImpl::Heap(s.into())) } } impl AsRef for Atom { #[inline] fn as_ref(&self) -> &str { - self.0.as_str() + self.as_str() } } impl Borrow for Atom { #[inline] fn borrow(&self) -> &str { - self.0.as_str() + self.as_str() } } impl> PartialEq for Atom { fn eq(&self, other: &T) -> bool { - self.0.as_str() == other.as_ref() + self.as_str() == other.as_ref() } } impl PartialEq for &str { fn eq(&self, other: &Atom) -> bool { - *self == other.0.as_str() + *self == other.as_str() + } +} + +impl hash::Hash for AtomImpl { + #[inline] + fn hash(&self, hasher: &mut H) { + match self { + Self::Arena(s) => s.hash(hasher), + Self::Heap(s) => s.hash(hasher), + Self::Inline(s) => s.hash(hasher), + } } } impl fmt::Debug for Atom { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.0.as_str(), f) + fmt::Debug::fmt(self.as_str(), f) } } impl fmt::Display for Atom { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.0.as_str(), f) + fmt::Display::fmt(self.as_str(), f) } }