Skip to content

Commit

Permalink
feat(allocator): Vec<u8>::into_string (#8017)
Browse files Browse the repository at this point in the history
Bumpalo has a method `String::from_utf8_unchecked` to covert a `Vec<u8>` to a `String`. But we can't use it because we use Bumpalo's `String` as our arena string type, but allocator_api2's `Vec` as our arena vec.

Provide the same functionality, that works around this incompatibility. Also use the faster `simdutf8` for checking that `Vec` contains a valid UTF-8 string.
  • Loading branch information
overlookmotel committed Dec 19, 2024
1 parent 0deb9e6 commit 75b775c
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 1 deletion.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/oxc_allocator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ doctest = false
[dependencies]
allocator-api2 = { workspace = true }
bumpalo = { workspace = true, features = ["allocator-api2", "collections"] }
simdutf8 = { workspace = true }

serde = { workspace = true, optional = true }

Expand Down
35 changes: 34 additions & 1 deletion crates/oxc_allocator/src/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ use allocator_api2::vec;
use bumpalo::Bump;
#[cfg(any(feature = "serialize", test))]
use serde::{ser::SerializeSeq, Serialize, Serializer};
use simdutf8::basic::{from_utf8, Utf8Error};

use crate::{Allocator, Box};
use crate::{Allocator, Box, String};

/// A `Vec` without [`Drop`], which stores its data in the arena allocator.
///
Expand Down Expand Up @@ -180,6 +181,38 @@ impl<'alloc, T> Vec<'alloc, T> {
}
}

impl<'alloc> Vec<'alloc, u8> {
/// Convert `Vec<u8>` into `String`.
///
/// # Errors
/// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string.
pub fn into_string(self) -> Result<String<'alloc>, Utf8Error> {
// Check vec comprises a valid UTF-8 string.
from_utf8(&self.0)?;
// SAFETY: We just checked it's a valid UTF-8 string
let s = unsafe { self.into_string_unchecked() };
Ok(s)
}

/// Convert `Vec<u8>` into [`String`], without checking bytes comprise a valid UTF-8 string.
///
/// Does not copy the contents of the `Vec`, converts in place. This is a zero-cost operation.
///
/// # SAFETY
/// Caller must ensure this `Vec<u8>` comprises a valid UTF-8 string.
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
#[inline] // `#[inline]` because this is a no-op at runtime
pub unsafe fn into_string_unchecked(self) -> String<'alloc> {
// Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`,
// and our inner `Vec` type is `allocator_api2::vec::Vec`.
// SAFETY: Conversion is safe because both types store data in arena in same way.
// Lifetime of returned `String` is same as lifetime of original `Vec<u8>`.
let inner = ManuallyDrop::into_inner(self.0);
let (ptr, len, cap, bump) = inner.into_raw_parts_with_alloc();
String::from_raw_parts_in(ptr, len, cap, bump)
}
}

impl<'alloc, T> ops::Deref for Vec<'alloc, T> {
type Target = vec::Vec<T, &'alloc Bump>;

Expand Down

0 comments on commit 75b775c

Please sign in to comment.