Skip to content

Commit

Permalink
perf(sourcemap): improve perf of search_original_line_and_column (#…
Browse files Browse the repository at this point in the history
…7926)

this seems to show a decent speed up on perf.
the thinking behind this change is:
1. subsequent calls to `search_original_line_and_column` will **never**
look at back at lines it has already searched
2. binary search is faster in this case as typically `idx` is only
increased by a small amount

Given this, we can skip checking a bunch of lines (that will never
return true) improving performance.

This is a very hot path. 


```
$ hyperfine --warmup=3 ./target/release-with-debug/examples/sourcemap-main ./target/release-with-debug/examples/sourcemap
Benchmark 1: ./target/release-with-debug/examples/sourcemap-main
  Time (mean ± σ):      79.5 ms ±   5.5 ms    [User: 65.1 ms, System: 12.2 ms]
  Range (min … max):    76.8 ms … 111.7 ms    37 runs
 
  Warning: Statistical outliers were detected. Consider re-running this benchmark on a quiet system without any interferences from other programs. It might help to use the '--warmup' or '--prepare' options.
 
Benchmark 2: ./target/release-with-debug/examples/sourcemap
  Time (mean ± σ):      72.9 ms ±   0.9 ms    [User: 59.2 ms, System: 12.2 ms]
  Range (min … max):    70.6 ms …  75.0 ms    40 runs
 
Summary
  ./target/release-with-debug/examples/sourcemap ran
    1.09 ± 0.08 times faster than ./target/release-with-debug/examples/sourcemap-main
```

---------

Co-authored-by: overlookmotel <[email protected]>
  • Loading branch information
camc314 and overlookmotel authored Dec 23, 2024
1 parent f79e9ad commit 78d2e83
Showing 1 changed file with 157 additions and 7 deletions.
164 changes: 157 additions & 7 deletions crates/oxc_codegen/src/sourcemap_builder.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{path::Path, sync::Arc};
use std::{cmp::max, path::Path, sync::Arc};

use nonmax::NonMaxU32;
use oxc_index::{Idx, IndexVec};
Expand All @@ -11,6 +11,9 @@ const LS_OR_PS_SECOND: u8 = 0x80;
const LS_THIRD: u8 = 0xA8;
const PS_THIRD: u8 = 0xA9;

/// Number of lines to check with linear search when translating byte position to line index
const LINE_SEARCH_LINEAR_ITERATIONS: usize = 16;

/// Index into vec of `ColumnOffsets`
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct ColumnOffsetsId(NonMaxU32);
Expand Down Expand Up @@ -71,6 +74,10 @@ pub struct SourcemapBuilder {
sourcemap_builder: oxc_sourcemap::SourceMapBuilder,
generated_line: u32,
generated_column: u32,
/// Tracks the last accessed line index to optimize sequential lookups in `search_original_line_and_column`.
/// Most calls to this method access positions in increasing order (e.g., when mapping source tokens linearly),
/// so we can avoid unnecessary binary searches by advancing linearly from this cached index.
last_line_lookup: u32,
}

impl SourcemapBuilder {
Expand All @@ -88,6 +95,7 @@ impl SourcemapBuilder {
sourcemap_builder,
generated_line: 0,
generated_column: 0,
last_line_lookup: 0,
}
}

Expand Down Expand Up @@ -129,13 +137,13 @@ impl SourcemapBuilder {
self.last_position = Some(position);
}

#[allow(clippy::cast_possible_truncation)]
#[expect(clippy::cast_possible_truncation)]
fn search_original_line_and_column(&mut self, position: u32) -> (u32, u32) {
let result = self
.line_offset_tables
.lines
.partition_point(|table| table.byte_offset_to_start_of_line <= position);
let original_line = if result > 0 { result - 1 } else { 0 };
let original_line = self.search_original_line(position);

// Store line index as starting point for next search
self.last_line_lookup = original_line as u32;

let line = &self.line_offset_tables.lines[original_line];
let mut original_column = position - line.byte_offset_to_start_of_line;
if let Some(column_offsets_id) = line.column_offsets_id {
Expand All @@ -148,6 +156,100 @@ impl SourcemapBuilder {
(original_line as u32, original_column)
}

/// Find line index for byte index `position`, using line offset table.
///
/// Usually output code is roughly in same order as it was in original source,
/// so line will be close to the line found in last call to this function.
///
/// So do fast linear search first over a few lines, and fallback to slower binary search
/// if this doesn't find the line.
fn search_original_line(&self, position: u32) -> usize {
let lines = &self.line_offset_tables.lines;
let idx = self.last_line_lookup as usize;

if position >= lines[idx].byte_offset_to_start_of_line {
self.search_original_line_forwards(position)
} else {
self.search_original_line_backwards(position)
}
}

/// Find line index for byte index `position`, starting search at `last_line_lookup`,
/// and working forwards.
///
/// Search forwards, looking for first line which starts *after* `position`.
/// `position` then must be on the line before that one.
fn search_original_line_forwards(&self, position: u32) -> usize {
let lines = &self.line_offset_tables.lines;
let last_idx = self.last_line_lookup as usize;

let start_idx = last_idx + 1;
let end_idx = start_idx + LINE_SEARCH_LINEAR_ITERATIONS;

// We have a fast path for when there are more than `LINE_SEARCH_LINEAR_ITERATIONS` lines
// to search (common case unless file is very short).
// Fast path is do linear search on first `LINE_SEARCH_LINEAR_ITERATIONS` lines,
// then fallback to binary search over remaining lines.
// If less than `LINE_SEARCH_LINEAR_ITERATIONS` lines to search, just do linear search,
// but that's slower as number of lines being searched is not constant, so loop cannot be unrolled.
if end_idx > lines.len() {
// Less than `LINE_SEARCH_LINEAR_ITERATIONS` lines to search. Take slow path.
// Unless file is very short, this branch is rarely taken.
return self.search_original_line_forwards_when_few_lines(position);
}

// Linear search for `LINE_SEARCH_LINEAR_ITERATIONS` lines.
// Compiler should unroll this loop as it has constant number of iterations.
// https://godbolt.org/z/heh1cnYa4
for (line_idx, line) in lines[start_idx..end_idx].iter().enumerate() {
if line.byte_offset_to_start_of_line > position {
// This line starts after `position`. `position` must be on previous line.
return start_idx + line_idx - 1;
}
}

// Line not found yet. Binary search over remaining lines.
lines[end_idx..].partition_point(|line| line.byte_offset_to_start_of_line <= position)
+ end_idx
- 1
}

#[cold]
fn search_original_line_forwards_when_few_lines(&self, position: u32) -> usize {
let lines = &self.line_offset_tables.lines;
let last_idx = self.last_line_lookup as usize;
let start_idx = last_idx + 1;

for (line_idx, line) in lines[start_idx..].iter().enumerate() {
if line.byte_offset_to_start_of_line > position {
// This line starts after `position`. `position` must be on previous line.
return start_idx + line_idx - 1;
}
}

// No line starts after `position`. `position` must be on last line.
lines.len() - 1
}

fn search_original_line_backwards(&self, position: u32) -> usize {
let lines = &self.line_offset_tables.lines;
let mut idx = self.last_line_lookup as usize;

let cap = idx.saturating_sub(16);
idx = max(idx.saturating_sub(1), cap);
while idx > cap && lines[idx].byte_offset_to_start_of_line > position {
idx -= 1;
}

if lines[idx].byte_offset_to_start_of_line < position {
idx = lines
.partition_point(|table| table.byte_offset_to_start_of_line <= position)
.saturating_sub(1);
}

idx
}

#[allow(clippy::cast_possible_truncation)]
fn update_generated_line_and_column(&mut self, output: &[u8]) {
let remaining = &output[self.last_generated_update..];
Expand Down Expand Up @@ -463,4 +565,52 @@ mod test {
let sm = builder.into_sourcemap();
assert_eq!(sm.get_tokens().count(), 2);
}

static SOURCE: &str = "a\nbc\ndef";
static MAPPINGS: &[(u32, u32)] = &[
(0, 0), // 'a'
(0, 1), // '\n'
(1, 0), // 'b'
(1, 1), // 'c'
(1, 2), // '\n'
(2, 0), // 'd'
(2, 1), // 'e'
(2, 2), // 'f'
(2, 3), // EOF
];

#[test]
fn test_search_original_line_and_column_sequential() {
let mut builder = SourcemapBuilder::new(Path::new("x.js"), SOURCE);

#[expect(clippy::cast_possible_truncation)]
for (pos, (expected_line, expected_col)) in MAPPINGS.iter().copied().enumerate() {
let (line, col) = builder.search_original_line_and_column(pos as u32);
assert_eq!((line, col), (expected_line, expected_col), "Mismatch at position {pos}");
}
}

#[test]
fn test_search_original_line_and_column_reverse_sequential() {
let mut builder = SourcemapBuilder::new(Path::new("x.js"), SOURCE);

#[expect(clippy::cast_possible_truncation)]
for (pos, (expected_line, expected_col)) in MAPPINGS.iter().copied().enumerate().rev() {
let (line, col) = builder.search_original_line_and_column(pos as u32);
assert_eq!((line, col), (expected_line, expected_col), "Mismatch at position {pos}");
}
}

#[test]
fn test_search_original_line_and_column_non_sequential() {
let mut builder = SourcemapBuilder::new(Path::new("x.js"), SOURCE);

let indexes = [8, 0, 7, 1, 6, 2, 5, 3, 4];

for pos in indexes {
let (expected_line, expected_col) = MAPPINGS[pos as usize];
let (line, col) = builder.search_original_line_and_column(pos);
assert_eq!((line, col), (expected_line, expected_col), "Mismatch at position {pos}");
}
}
}

0 comments on commit 78d2e83

Please sign in to comment.