From ecf7b654e0a86da12b2e280218ba019d9149f9e4 Mon Sep 17 00:00:00 2001 From: Boshen Date: Fri, 6 Dec 2024 12:46:30 +0800 Subject: [PATCH] feat(napi/parser): introduce experimental magic string --- Cargo.lock | 60 +++++++++-- Cargo.toml | 4 +- napi/parser/Cargo.toml | 3 + napi/parser/bindings.js | 2 + napi/parser/index.d.ts | 40 ++++++-- napi/parser/index.js | 42 ++++++-- napi/parser/src/lib.rs | 21 ++-- napi/parser/src/magic_string.rs | 141 ++++++++++++++++++++++++++ napi/parser/src/types.rs | 44 ++++++-- napi/parser/test/magic_string.test.ts | 26 +++++ 10 files changed, 347 insertions(+), 36 deletions(-) create mode 100644 napi/parser/src/magic_string.rs create mode 100644 napi/parser/test/magic_string.test.ts diff --git a/Cargo.lock b/Cargo.lock index a5dd636362e965..ce13c57cf6a957 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1516,7 +1516,7 @@ dependencies = [ "bitflags 2.6.0", "itertools", "nonmax", - "oxc_index", + "oxc_index 2.0.0", "oxc_syntax", "petgraph", "rustc-hash", @@ -1534,10 +1534,10 @@ dependencies = [ "nonmax", "oxc_allocator", "oxc_ast", - "oxc_index", + "oxc_index 2.0.0", "oxc_mangler", "oxc_parser", - "oxc_sourcemap", + "oxc_sourcemap 1.0.3", "oxc_span", "oxc_syntax", "pico-args", @@ -1616,6 +1616,15 @@ dependencies = [ "serde", ] +[[package]] +name = "oxc_index" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f004e73d346bec03a428ca26cf2df245d08563f1d3268b7bcbd5554fc1db05c5" +dependencies = [ + "rayon", +] + [[package]] name = "oxc_index" version = "2.0.0" @@ -1692,7 +1701,7 @@ dependencies = [ "oxc_codegen", "oxc_diagnostics", "oxc_ecmascript", - "oxc_index", + "oxc_index 2.0.0", "oxc_macros", "oxc_parser", "oxc_regular_expression", @@ -1730,7 +1739,7 @@ version = "0.39.0" dependencies = [ "itertools", "oxc_ast", - "oxc_index", + "oxc_index 2.0.0", "oxc_semantic", "oxc_span", ] @@ -1826,7 +1835,9 @@ dependencies = [ "oxc_ast", "oxc_napi", "rustc-hash", + "self_cell", "serde_json", + "string_wizard", ] [[package]] @@ -1917,7 +1928,7 @@ dependencies = [ "oxc_cfg", "oxc_diagnostics", "oxc_ecmascript", - "oxc_index", + "oxc_index 2.0.0", "oxc_parser", "oxc_span", "oxc_syntax", @@ -1929,6 +1940,20 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "oxc_sourcemap" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6889e95c8db9298201db5389810d61ca491443863897f557c0ce47f961541cde" +dependencies = [ + "base64-simd", + "cfg-if", + "cow-utils", + "rustc-hash", + "serde", + "serde_json", +] + [[package]] name = "oxc_sourcemap" version = "1.0.3" @@ -1969,7 +1994,7 @@ dependencies = [ "oxc_allocator", "oxc_ast_macros", "oxc_estree", - "oxc_index", + "oxc_index 2.0.0", "oxc_span", "phf", "rustc-hash", @@ -2026,7 +2051,7 @@ dependencies = [ "napi-derive", "oxc", "oxc_napi", - "oxc_sourcemap", + "oxc_sourcemap 1.0.3", "rustc-hash", ] @@ -2085,7 +2110,7 @@ version = "0.0.0" dependencies = [ "console_error_panic_hook", "oxc", - "oxc_index", + "oxc_index 2.0.0", "oxc_linter", "oxc_prettier", "serde", @@ -2578,6 +2603,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "self_cell" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2fdfc24bc566f839a2da4c4295b82db7d25a24253867d5c64355abb5799bdbe" + [[package]] name = "semver" version = "1.0.23" @@ -2786,6 +2817,17 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9557cb6521e8d009c51a8666f09356f4b817ba9ba0981a305bd86aee47bd35c" +[[package]] +name = "string_wizard" +version = "0.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "651f4a6ed8df932ab9285b892fdd35bdc72b17b5eff0232cd51048a0b28af4a5" +dependencies = [ + "oxc_index 1.0.1", + "oxc_sourcemap 0.38.0", + "rustc-hash", +] + [[package]] name = "subtle" version = "2.6.1" diff --git a/Cargo.toml b/Cargo.toml index 0cec2be84804d2..5b27a6bf1c7a7f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -123,7 +123,7 @@ unicode-id-start = "1" oxc-browserslist = "1.1.0" oxc_index = "2" oxc_resolver = "2.1.1" -oxc_sourcemap = "1.0.3" +oxc_sourcemap = "1" # allocator-api2 = "0.2.21" @@ -180,6 +180,7 @@ rustc-hash = "2.*" ryu-js = "1.0.1" saphyr = "0.0.3" schemars = "0.8.21" +self_cell = "1.0.4" seq-macro = "0.3.5" serde = "1.0.215" serde-wasm-bindgen = "0.6.5" @@ -187,6 +188,7 @@ serde_json = "1.0.133" sha1 = "0.10.6" simdutf8 = { version = "0.1.5", features = ["aarch64_neon"] } similar = "2.6.0" +string_wizard = "0.0.23" tempfile = "3.14.0" tokio = "1.42.0" tower-lsp = "0.20.0" diff --git a/napi/parser/Cargo.toml b/napi/parser/Cargo.toml index f17bec23d04dc4..a4d767d43642da 100644 --- a/napi/parser/Cargo.toml +++ b/napi/parser/Cargo.toml @@ -26,7 +26,10 @@ oxc_ast = { workspace = true, features = ["serialize"] } # enable feature only oxc_napi = { workspace = true } rustc-hash = { workspace = true } +self_cell = { workspace = true } serde_json = { workspace = true } +string_wizard = { workspace = true, features = ["sourcemap"] } +# oxc_sourcemap = { workspace = true, features = ["napi"] } napi = { workspace = true, features = ["async"] } napi-derive = { workspace = true } diff --git a/napi/parser/bindings.js b/napi/parser/bindings.js index 2865f9494222eb..b03d2e44ef17ab 100644 --- a/napi/parser/bindings.js +++ b/napi/parser/bindings.js @@ -361,6 +361,8 @@ if (!nativeBinding) { throw new Error(`Failed to load native binding`) } +module.exports.MagicString = nativeBinding.MagicString +module.exports.ParseResult = nativeBinding.ParseResult module.exports.ExportExportNameKind = nativeBinding.ExportExportNameKind module.exports.ExportImportNameKind = nativeBinding.ExportImportNameKind module.exports.ExportLocalNameKind = nativeBinding.ExportLocalNameKind diff --git a/napi/parser/index.d.ts b/napi/parser/index.d.ts index 45cafbe02e3d24..ce9d17e1788d59 100644 --- a/napi/parser/index.d.ts +++ b/napi/parser/index.d.ts @@ -2,6 +2,29 @@ /* eslint-disable */ export * from '@oxc-project/types'; +export declare class MagicString { + getSourceText(start: number, end: number): string + length(): number + toString(): string + append(input: string): this + appendLeft(index: number, input: string): this + appendRight(index: number, input: string): this + indent(): this + prepend(input: string): this + prependLeft(index: number, input: string): this + prependRight(index: number, input: string): this + relocate(start: number, end: number, to: number): this + remove(start: number, end: number): this +} + +export declare class ParseResult { + get program(): import("@oxc-project/types").Program + get module(): EcmaScriptModule + get comments(): Array + get errors(): Array + get magicString(): MagicString +} + export interface Comment { type: 'Line' | 'Block' value: string @@ -108,6 +131,10 @@ export declare const enum ImportNameKind { Default = 'Default' } +export interface OverwriteOptions { + contentOnly: boolean +} + /** * Parse asynchronously. * @@ -115,13 +142,6 @@ export declare const enum ImportNameKind { */ export declare function parseAsync(filename: string, sourceText: string, options?: ParserOptions | undefined | null): Promise -export interface ParseResult { - program: import("@oxc-project/types").Program - module: EcmaScriptModule - comments: Array - errors: Array -} - export interface ParserOptions { sourceType?: 'script' | 'module' | 'unambiguous' | undefined /** Treat the source text as `js`, `jsx`, `ts`, or `tsx`. */ @@ -154,6 +174,12 @@ export declare const enum Severity { Advice = 'Advice' } +export interface SourceMapOptions { + includeContent?: boolean + source?: string + hires?: boolean +} + export interface Span { start: number end: number diff --git a/napi/parser/index.js b/napi/parser/index.js index ff5b26bdbda4d5..2cdb1fe05ed240 100644 --- a/napi/parser/index.js +++ b/napi/parser/index.js @@ -1,14 +1,44 @@ const bindings = require('./bindings.js'); +module.exports.MagicString = bindings.MagicString; +module.exports.ParseResult = bindings.ParseResult; +module.exports.ExportExportNameKind = bindings.ExportExportNameKind; +module.exports.ExportImportNameKind = bindings.ExportImportNameKind; +module.exports.ExportLocalNameKind = bindings.ExportLocalNameKind; +module.exports.ImportNameKind = bindings.ImportNameKind; module.exports.parseWithoutReturn = bindings.parseWithoutReturn; +module.exports.Severity = bindings.Severity; + +function wrap(result) { + let program, module, comments, errors, magicString; + return { + get program() { + if (!program) program = JSON.parse(result.program); + return program; + }, + get module() { + if (!module) module = result.module; + return module; + }, + get comments() { + if (!comments) comments = result.comments; + return comments; + }, + get errors() { + if (!errors) errors = result.errors; + return errors; + }, + get magicString() { + if (!magicString) magicString = result.magicString; + return magicString; + }, + }; +} module.exports.parseAsync = async function parseAsync(...args) { - const result = await bindings.parseAsync(...args); - result.program = JSON.parse(result.program); - return result; + return wrap(await bindings.parseAsync(...args)); }; + module.exports.parseSync = function parseSync(...args) { - const result = bindings.parseSync(...args); - result.program = JSON.parse(result.program); - return result; + return wrap(bindings.parseSync(...args)); }; diff --git a/napi/parser/src/lib.rs b/napi/parser/src/lib.rs index ab028921eb3ae7..9596451cd81cec 100644 --- a/napi/parser/src/lib.rs +++ b/napi/parser/src/lib.rs @@ -3,8 +3,11 @@ )] mod convert; +mod magic_string; mod types; +use std::mem; + use napi::{bindgen_prelude::AsyncTask, Task}; use napi_derive::napi; @@ -16,7 +19,10 @@ use oxc::{ }; use oxc_napi::Error; -pub use crate::types::{Comment, EcmaScriptModule, ParseResult, ParserOptions}; +pub use crate::{ + magic_string::MagicString, + types::{Comment, EcmaScriptModule, ParseResult, ParserOptions}, +}; fn get_source_type(filename: &str, options: &ParserOptions) -> SourceType { match options.lang.as_deref() { @@ -62,10 +68,10 @@ pub fn parse_without_return(filename: String, source_text: String, options: Opti parse(&allocator, source_type, &source_text, &options); } -fn parse_with_return(filename: &str, source_text: &str, options: &ParserOptions) -> ParseResult { +fn parse_with_return(filename: &str, source_text: String, options: &ParserOptions) -> ParseResult { let allocator = Allocator::default(); let source_type = get_source_type(filename, options); - let ret = parse(&allocator, source_type, source_text, options); + let ret = parse(&allocator, source_type, &source_text, options); let program = serde_json::to_string(&ret.program).unwrap(); let errors = ret.errors.into_iter().map(Error::from).collect::>(); @@ -79,14 +85,14 @@ fn parse_with_return(filename: &str, source_text: &str, options: &ParserOptions) CommentKind::Line => String::from("Line"), CommentKind::Block => String::from("Block"), }, - value: comment.content_span().source_text(source_text).to_string(), + value: comment.content_span().source_text(&source_text).to_string(), start: comment.span.start, end: comment.span.end, }) .collect::>(); let module = EcmaScriptModule::from(&ret.module_record); - ParseResult { program, module, comments, errors } + ParseResult { source_text, program, module, comments, errors } } /// Parse synchronously. @@ -97,7 +103,7 @@ pub fn parse_sync( options: Option, ) -> ParseResult { let options = options.unwrap_or_default(); - parse_with_return(&filename, &source_text, &options) + parse_with_return(&filename, source_text, &options) } pub struct ResolveTask { @@ -112,7 +118,8 @@ impl Task for ResolveTask { type Output = ParseResult; fn compute(&mut self) -> napi::Result { - Ok(parse_with_return(&self.filename, &self.source_text, &self.options)) + let source_text = mem::take(&mut self.source_text); + Ok(parse_with_return(&self.filename, source_text, &self.options)) } fn resolve(&mut self, _: napi::Env, result: Self::Output) -> napi::Result { diff --git a/napi/parser/src/magic_string.rs b/napi/parser/src/magic_string.rs new file mode 100644 index 00000000000000..dd650d41d013fb --- /dev/null +++ b/napi/parser/src/magic_string.rs @@ -0,0 +1,141 @@ +#![allow(clippy::cast_possible_truncation)] +// use std::sync::Arc; + +use napi_derive::napi; + +// use oxc_sourcemap::napi::SourceMap; +use self_cell::self_cell; +use string_wizard::MagicString as MS; + +#[napi] +pub struct MagicString { + cell: MagicStringImpl, +} + +self_cell!( + struct MagicStringImpl { + owner: String, + #[covariant] + dependent: MS, + } +); + +impl MagicString { + pub fn new(source_text: String) -> Self { + Self { cell: MagicStringImpl::new(source_text, |s| string_wizard::MagicString::new(s)) } + } +} + +#[napi(object)] +pub struct OverwriteOptions { + pub content_only: bool, +} + +#[napi(object)] +pub struct SourceMapOptions { + pub include_content: Option, + pub source: Option, + pub hires: Option, +} + +#[napi] +impl MagicString { + #[napi] + pub fn get_source_text(&self, start: u32, end: u32) -> &str { + &self.cell.borrow_owner()[start as usize..end as usize] + } + + #[napi] + pub fn length(&self) -> u32 { + self.cell.borrow_dependent().len() as u32 + } + + #[napi] + #[allow(clippy::inherent_to_string)] + pub fn to_string(&self) -> String { + self.cell.borrow_dependent().to_string() + } + + // #[napi] + // pub fn source_map(&self, options: Option) -> SourceMap { + // let options = options.map(|o| string_wizard::SourceMapOptions { + // include_content: o.include_content.unwrap_or_default(), + // source: o.source.map(Arc::from).unwrap_or_default(), + // hires: o.hires.unwrap_or_default(), + // }); + // let map = self.cell.borrow_dependent().source_map(options.unwrap_or_default()); + // oxc_sourcemap::napi::SourceMap::from(map) + // } + + #[napi] + pub fn append(&mut self, input: String) -> &Self { + self.cell.with_dependent_mut(|_, ms| { + ms.append(input); + }); + self + } + + #[napi] + pub fn append_left(&mut self, index: u32, input: String) -> &Self { + self.cell.with_dependent_mut(|_, ms| { + ms.append_left(index as usize, input); + }); + self + } + + #[napi] + pub fn append_right(&mut self, index: u32, input: String) -> &Self { + self.cell.with_dependent_mut(|_, ms| { + ms.append_right(index as usize, input); + }); + self + } + + #[napi] + pub fn indent(&mut self) -> &Self { + self.cell.with_dependent_mut(|_, ms| { + ms.indent(); + }); + self + } + + #[napi] + pub fn prepend(&mut self, input: String) -> &Self { + self.cell.with_dependent_mut(|_, ms| { + ms.prepend(input); + }); + self + } + + #[napi] + pub fn prepend_left(&mut self, index: u32, input: String) -> &Self { + self.cell.with_dependent_mut(|_, ms| { + ms.prepend_left(index as usize, input); + }); + self + } + + #[napi] + pub fn prepend_right(&mut self, index: u32, input: String) -> &Self { + self.cell.with_dependent_mut(|_, ms| { + ms.prepend_right(index as usize, input); + }); + self + } + + #[napi] + pub fn relocate(&mut self, start: u32, end: u32, to: u32) -> &Self { + self.cell.with_dependent_mut(|_, ms| { + ms.relocate(start as usize, end as usize, to as usize); + }); + self + } + + #[napi] + pub fn remove(&mut self, start: u32, end: u32) -> &Self { + self.cell.with_dependent_mut(|_, ms| { + ms.remove(start as usize, end as usize); + }); + self + } +} diff --git a/napi/parser/src/types.rs b/napi/parser/src/types.rs index 9b80619972536e..3212e3414f5265 100644 --- a/napi/parser/src/types.rs +++ b/napi/parser/src/types.rs @@ -1,7 +1,10 @@ use napi_derive::napi; +use std::mem; use oxc_napi::Error; +use crate::magic_string::MagicString; + #[napi(object)] #[derive(Default)] pub struct ParserOptions { @@ -22,13 +25,41 @@ pub struct ParserOptions { pub preserve_parens: Option, } -#[napi(object)] +#[napi] pub struct ParseResult { - #[napi(ts_type = "import(\"@oxc-project/types\").Program")] - pub program: String, - pub module: EcmaScriptModule, - pub comments: Vec, - pub errors: Vec, + pub(crate) source_text: String, + pub(crate) program: String, + pub(crate) module: EcmaScriptModule, + pub(crate) comments: Vec, + pub(crate) errors: Vec, +} + +#[napi] +impl ParseResult { + #[napi(getter, ts_return_type = "import(\"@oxc-project/types\").Program")] + pub fn get_program(&mut self) -> String { + mem::take(&mut self.program) + } + + #[napi(getter)] + pub fn module(&mut self) -> EcmaScriptModule { + mem::take(&mut self.module) + } + + #[napi(getter)] + pub fn comments(&mut self) -> Vec { + mem::take(&mut self.comments) + } + + #[napi(getter)] + pub fn errors(&mut self) -> Vec { + mem::take(&mut self.errors) + } + + #[napi(getter)] + pub fn magic_string(&mut self) -> MagicString { + MagicString::new(mem::take(&mut self.source_text)) + } } #[napi(object)] @@ -41,6 +72,7 @@ pub struct Comment { } #[napi(object)] +#[derive(Default)] pub struct EcmaScriptModule { /// Has ESM syntax. /// diff --git a/napi/parser/test/magic_string.test.ts b/napi/parser/test/magic_string.test.ts new file mode 100644 index 00000000000000..0ff02e5f3e9cfb --- /dev/null +++ b/napi/parser/test/magic_string.test.ts @@ -0,0 +1,26 @@ +import { expect, describe, it } from 'vitest'; + +import type { StringLiteral, VariableDeclaration } from '../index.js'; +import { parseSync } from '../index.js'; + +describe('simple', () => { + const code = 'const s: String = "测试"'; + + it('calls magic string APIs', () => { + // `oxc` holds a magic string instance on the Rust side. + const { program, magicString: ms } = parseSync('test.ts', code); + const declaration = program.body[0] as VariableDeclaration; + const stringLiteral = declaration.declarations[0].init as StringLiteral; + + // These spans are in utf8 offsets. + const start = stringLiteral.start + 1; + const end = stringLiteral.end - 1; + + // Access source text by utf8 offset. + expect(ms.getSourceText(start, end)).toEqual('测试'); + + // Magic string manipulation. + ms.remove(start, end).append(';'); + expect(ms.toString()).toEqual('const s: String = "";'); + }); +});