From 52a41804df71628e5d93b8d1dddc1a37025226c1 Mon Sep 17 00:00:00 2001 From: Boshen Date: Tue, 10 Dec 2024 17:21:37 +0800 Subject: [PATCH] feat(data_structures): add rope --- Cargo.lock | 4 +- crates/oxc_data_structures/Cargo.toml | 1 + crates/oxc_data_structures/src/lib.rs | 1 + .../src/rope.rs} | 43 +++++++++---------- crates/oxc_language_server/Cargo.toml | 2 +- crates/oxc_language_server/src/linter.rs | 42 ++++++++---------- crates/oxc_transformer/Cargo.toml | 1 - crates/oxc_transformer/src/jsx/jsx_source.rs | 26 ++++++----- crates/oxc_transformer/src/jsx/mod.rs | 1 - 9 files changed, 58 insertions(+), 63 deletions(-) rename crates/{oxc_transformer/src/jsx/utils.rs => oxc_data_structures/src/rope.rs} (67%) diff --git a/Cargo.lock b/Cargo.lock index ce13c57cf6a957..5357028946330a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1588,6 +1588,7 @@ name = "oxc_data_structures" version = "0.39.0" dependencies = [ "assert-unchecked", + "ropey", ] [[package]] @@ -1663,12 +1664,12 @@ dependencies = [ "ignore", "log", "oxc_allocator", + "oxc_data_structures", "oxc_diagnostics", "oxc_linter", "oxc_parser", "oxc_semantic", "oxc_span", - "ropey", "rustc-hash", "serde", "serde_json", @@ -2081,7 +2082,6 @@ dependencies = [ "oxc_syntax", "oxc_traverse", "pico-args", - "ropey", "rustc-hash", "serde", "serde_json", diff --git a/crates/oxc_data_structures/Cargo.toml b/crates/oxc_data_structures/Cargo.toml index 6f9bc0af26ab30..694705e4eae539 100644 --- a/crates/oxc_data_structures/Cargo.toml +++ b/crates/oxc_data_structures/Cargo.toml @@ -22,3 +22,4 @@ doctest = false [dependencies] assert-unchecked = { workspace = true } +ropey = { workspace = true } diff --git a/crates/oxc_data_structures/src/lib.rs b/crates/oxc_data_structures/src/lib.rs index 1a3c20dca0f6c6..518ca178a6ed2d 100644 --- a/crates/oxc_data_structures/src/lib.rs +++ b/crates/oxc_data_structures/src/lib.rs @@ -1,3 +1,4 @@ //! Data structures used across other oxc crates. #![warn(missing_docs)] +pub mod rope; pub mod stack; diff --git a/crates/oxc_transformer/src/jsx/utils.rs b/crates/oxc_data_structures/src/rope.rs similarity index 67% rename from crates/oxc_transformer/src/jsx/utils.rs rename to crates/oxc_data_structures/src/rope.rs index 12a1ebc2eeee3e..ff6f39f70ac169 100644 --- a/crates/oxc_transformer/src/jsx/utils.rs +++ b/crates/oxc_data_structures/src/rope.rs @@ -1,96 +1,93 @@ -use ropey::Rope; - -/// Get line and column from offset and source text. -/// -/// Line number starts at 1. -/// Column number is in UTF-16 characters, and starts at 1. -/// -/// This matches Babel's output. -pub fn get_line_column(rope: &Rope, offset: u32, source_text: &str) -> (usize, usize) { +//! Rope + +pub use ropey::Rope; + +/// Get UTF16 line and column from UTF8 offset and source text. +#[expect(clippy::cast_possible_truncation)] +pub fn get_line_column(rope: &Rope, offset: u32, source_text: &str) -> (u32, u32) { let offset = offset as usize; // Get line number and byte offset of start of line let line_index = rope.byte_to_line(offset); let line_offset = rope.line_to_byte(line_index); // Get column number let column_index = source_text[line_offset..offset].encode_utf16().count(); - // line and column are zero-indexed, but we want 1-indexed - (line_index + 1, column_index + 1) + (line_index as u32, column_index as u32) } #[cfg(test)] mod test { use ropey::Rope; - fn test_line_column(offset: u32, source_text: &str) -> (usize, usize) { + fn test_line_column(offset: u32, source_text: &str) -> (u32, u32) { let rope = Rope::from_str(source_text); super::get_line_column(&rope, offset, source_text) } #[test] fn empty_file() { - assert_eq!(test_line_column(0, ""), (1, 1)); + assert_eq!(test_line_column(0, ""), (0, 0)); } #[test] fn first_line_start() { - assert_eq!(test_line_column(0, "foo\nbar\n"), (1, 1)); + assert_eq!(test_line_column(0, "foo\nbar\n"), (0, 0)); } #[test] fn first_line_middle() { - assert_eq!(test_line_column(5, "blahblahblah\noops\n"), (1, 6)); + assert_eq!(test_line_column(5, "blahblahblah\noops\n"), (0, 5)); } #[test] fn later_line_start() { - assert_eq!(test_line_column(8, "foo\nbar\nblahblahblah"), (3, 1)); + assert_eq!(test_line_column(8, "foo\nbar\nblahblahblah"), (2, 0)); } #[test] fn later_line_middle() { - assert_eq!(test_line_column(12, "foo\nbar\nblahblahblah"), (3, 5)); + assert_eq!(test_line_column(12, "foo\nbar\nblahblahblah"), (2, 4)); } #[test] fn after_2_byte_unicode() { assert_eq!("£".len(), 2); assert_eq!(utf16_len("£"), 1); - assert_eq!(test_line_column(4, "£abc"), (1, 4)); + assert_eq!(test_line_column(4, "£abc"), (0, 3)); } #[test] fn after_3_byte_unicode() { assert_eq!("अ".len(), 3); assert_eq!(utf16_len("अ"), 1); - assert_eq!(test_line_column(5, "अabc"), (1, 4)); + assert_eq!(test_line_column(5, "अabc"), (0, 3)); } #[test] fn after_4_byte_unicode() { assert_eq!("🍄".len(), 4); assert_eq!(utf16_len("🍄"), 2); - assert_eq!(test_line_column(6, "🍄abc"), (1, 5)); + assert_eq!(test_line_column(6, "🍄abc"), (0, 4)); } #[test] fn after_2_byte_unicode_on_previous_line() { assert_eq!("£".len(), 2); assert_eq!(utf16_len("£"), 1); - assert_eq!(test_line_column(4, "£\nabc"), (2, 2)); + assert_eq!(test_line_column(4, "£\nabc"), (1, 1)); } #[test] fn after_3_byte_unicode_on_previous_line() { assert_eq!("अ".len(), 3); assert_eq!(utf16_len("अ"), 1); - assert_eq!(test_line_column(5, "अ\nabc"), (2, 2)); + assert_eq!(test_line_column(5, "अ\nabc"), (1, 1)); } #[test] fn after_4_byte_unicode_on_previous_line() { assert_eq!("🍄".len(), 4); assert_eq!(utf16_len("🍄"), 2); - assert_eq!(test_line_column(6, "🍄\nabc"), (2, 2)); + assert_eq!(test_line_column(6, "🍄\nabc"), (1, 1)); } #[cfg(test)] diff --git a/crates/oxc_language_server/Cargo.toml b/crates/oxc_language_server/Cargo.toml index c821f6af988f7c..c76be25af33e6b 100644 --- a/crates/oxc_language_server/Cargo.toml +++ b/crates/oxc_language_server/Cargo.toml @@ -23,6 +23,7 @@ doctest = false [dependencies] oxc_allocator = { workspace = true } +oxc_data_structures = { workspace = true } oxc_diagnostics = { workspace = true } oxc_linter = { workspace = true } oxc_parser = { workspace = true } @@ -36,7 +37,6 @@ futures = { workspace = true } globset = { workspace = true } ignore = { workspace = true, features = ["simd-accel"] } log = { workspace = true } -ropey = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } diff --git a/crates/oxc_language_server/src/linter.rs b/crates/oxc_language_server/src/linter.rs index 01c5f4998c4970..e1d91b8ccc9d2a 100644 --- a/crates/oxc_language_server/src/linter.rs +++ b/crates/oxc_language_server/src/linter.rs @@ -1,5 +1,3 @@ -use cow_utils::CowUtils; -use oxc_linter::loader::LINT_PARTIAL_LOADER_EXT; use std::{ fs, path::{Path, PathBuf}, @@ -8,9 +6,18 @@ use std::{ sync::{Arc, OnceLock}, }; +use cow_utils::CowUtils; use log::debug; +use rustc_hash::FxHashSet; +use tower_lsp::lsp_types::{ + self, CodeDescription, DiagnosticRelatedInformation, DiagnosticSeverity, NumberOrString, + Position, Range, Url, +}; + use oxc_allocator::Allocator; +use oxc_data_structures::rope::{get_line_column, Rope}; use oxc_diagnostics::{Error, NamedSource, Severity}; +use oxc_linter::loader::LINT_PARTIAL_LOADER_EXT; use oxc_linter::{ loader::{JavaScriptSource, Loader}, FixKind, Linter, ModuleRecord, @@ -18,12 +25,6 @@ use oxc_linter::{ use oxc_parser::{ParseOptions, Parser}; use oxc_semantic::SemanticBuilder; use oxc_span::VALID_EXTENSIONS; -use ropey::Rope; -use rustc_hash::FxHashSet; -use tower_lsp::lsp_types::{ - self, CodeDescription, DiagnosticRelatedInformation, DiagnosticSeverity, NumberOrString, - Position, Range, Url, -}; const LINT_DOC_LINK_PREFIX: &str = "https://oxc.rs/docs/guide/usage/linter/rules"; #[derive(Debug)] @@ -53,13 +54,11 @@ impl ErrorWithPosition { let labels_with_pos: Vec = labels .iter() .map(|labeled_span| LabeledSpanWithPosition { - start_pos: offset_to_position(labeled_span.offset() + start, text) - .unwrap_or_default(), + start_pos: offset_to_position(labeled_span.offset() + start, text), end_pos: offset_to_position( labeled_span.offset() + start + labeled_span.len(), text, - ) - .unwrap_or_default(), + ), message: labeled_span.label().map(ToString::to_string), }) .collect(); @@ -304,13 +303,11 @@ impl IsolatedLintHandler { start: offset_to_position( (f.span.start + start) as usize, source_text.as_str(), - ) - .unwrap_or_default(), + ), end: offset_to_position( (f.span.end + start) as usize, source_text.as_str(), - ) - .unwrap_or_default(), + ), }, }); @@ -359,16 +356,11 @@ impl IsolatedLintHandler { } #[allow(clippy::cast_possible_truncation)] -fn offset_to_position(offset: usize, source_text: &str) -> Option { +fn offset_to_position(offset: usize, source_text: &str) -> Position { + // TODO(perf): share a single instance of `Rope` let rope = Rope::from_str(source_text); - // Get line number and byte offset of start of line - let line_index = rope.try_byte_to_line(offset).ok()?; - let line_offset = rope.try_line_to_byte(line_index).ok()?; - - // Get column number - let column_index = source_text[line_offset..offset].encode_utf16().count(); - - Some(Position::new(line_index as u32, column_index as u32)) + let (line, column) = get_line_column(&rope, offset as u32, source_text); + Position::new(line, column) } pub struct ServerLinter { diff --git a/crates/oxc_transformer/Cargo.toml b/crates/oxc_transformer/Cargo.toml index 6a21fc190e7d6e..a83e3906fe558c 100644 --- a/crates/oxc_transformer/Cargo.toml +++ b/crates/oxc_transformer/Cargo.toml @@ -41,7 +41,6 @@ dashmap = { workspace = true } indexmap = { workspace = true } itoa = { workspace = true } lazy_static = { workspace = true } -ropey = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } diff --git a/crates/oxc_transformer/src/jsx/jsx_source.rs b/crates/oxc_transformer/src/jsx/jsx_source.rs index 92bed1f3c122d7..3c2180972ec148 100644 --- a/crates/oxc_transformer/src/jsx/jsx_source.rs +++ b/crates/oxc_transformer/src/jsx/jsx_source.rs @@ -33,7 +33,7 @@ //! //! * Babel plugin implementation: -use ropey::Rope; +use oxc_data_structures::rope::{get_line_column, Rope}; use oxc_ast::ast::*; use oxc_diagnostics::OxcDiagnostic; @@ -43,8 +43,6 @@ use oxc_traverse::{BoundIdentifier, Traverse, TraverseCtx}; use crate::TransformCtx; -use super::utils::get_line_column; - const SOURCE: &str = "__source"; const FILE_NAME_VAR: &str = "jsxFileName"; @@ -77,16 +75,24 @@ impl<'a, 'ctx> Traverse<'a> for JsxSource<'a, 'ctx> { } impl<'a, 'ctx> JsxSource<'a, 'ctx> { - pub fn get_line_column(&mut self, offset: u32) -> (usize, usize) { + /// Get line and column from offset and source text. + /// + /// Line number starts at 1. + /// Column number is in UTF-16 characters, and starts at 1. + /// + /// This matches Babel's output. + pub fn get_line_column(&mut self, offset: u32) -> (u32, u32) { let source_rope = self.source_rope.get_or_insert_with(|| Rope::from_str(self.ctx.source_text)); - get_line_column(source_rope, offset, self.ctx.source_text) + let (line, column) = get_line_column(source_rope, offset, self.ctx.source_text); + // line and column are zero-indexed, but we want 1-indexed + (line + 1, column + 1) } pub fn get_object_property_kind_for_jsx_plugin( &mut self, - line: usize, - column: usize, + line: u32, + column: u32, ctx: &mut TraverseCtx<'a>, ) -> ObjectPropertyKind<'a> { let kind = PropertyKind::Init; @@ -136,11 +142,11 @@ impl<'a, 'ctx> JsxSource<'a, 'ctx> { elem.attributes.push(attribute_item); } - #[allow(clippy::cast_precision_loss)] + #[expect(clippy::cast_lossless)] pub fn get_source_object( &mut self, - line: usize, - column: usize, + line: u32, + column: u32, ctx: &mut TraverseCtx<'a>, ) -> Expression<'a> { let kind = PropertyKind::Init; diff --git a/crates/oxc_transformer/src/jsx/mod.rs b/crates/oxc_transformer/src/jsx/mod.rs index 75db0642141c0a..f216ab6c0f6ace 100644 --- a/crates/oxc_transformer/src/jsx/mod.rs +++ b/crates/oxc_transformer/src/jsx/mod.rs @@ -11,7 +11,6 @@ mod jsx_self; mod jsx_source; mod options; mod refresh; -mod utils; use refresh::ReactRefresh; pub use display_name::ReactDisplayName;