Skip to content
This repository has been archived by the owner on Feb 12, 2018. It is now read-only.

Commit

Permalink
Add new caching strategy. Modest performance increase
Browse files Browse the repository at this point in the history
Cache regex matches per pattern, so that each regex only runs as many
times as it needs to.
  • Loading branch information
trishume committed Jun 29, 2016
1 parent 11ac7b4 commit 881f0e0
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 24 deletions.
8 changes: 8 additions & 0 deletions DESIGN.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ $cargo run --example syncat testdata/jquery.js | grep leastmatch | wc -l
Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
Running `target/debug/examples/syncat testdata/jquery.js`
137842
# With search caching
$cargo run --example syncat testdata/jquery.js | grep searchcached | wc -l
Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
Running `target/debug/examples/syncat testdata/jquery.js`
2440527
$cargo run --example syncat testdata/jquery.js | grep regsearch | wc -l
Running `target/debug/examples/syncat testdata/jquery.js`
950195
```

Average unique regexes per line is 87.58, average non-unique is regsearch/lines = 317
Expand Down
4 changes: 2 additions & 2 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ Currently `syntect` is reasonably fast but not as fast as it could be. The follo

The current perf numbers are below. These numbers should get better once I implement more of the things above, but they're on par with many other text editors.

- Highlighting 9200 lines/247kb of jQuery 2.1 takes 1.4s, or ~6500 lines/second. For comparison:
- Textmate 2, Spacemacs and Visual Studio Code all take around the same time (2ish seconds)
- Highlighting 9200 lines/247kb of jQuery 2.1 takes 1.0s. For comparison:
- Textmate 2, Spacemacs and Visual Studio Code all take around 2ish seconds (measured by hand with a stopwatch, hence approximate).
- Atom takes 6s
- Sublime Text 3 dev build takes ~0.22s, despite having a super fancy javascript syntax definition
- Vim is instantaneous but that isn't a fair comparison since vim's highlighting is far more basic than the other editors (Compare [vim's grammar](https://github.com/vim/vim/blob/master/runtime/syntax/javascript.vim) to [Sublime's](https://github.com/sublimehq/Packages/blob/master/JavaScript/JavaScript.sublime-syntax)).
Expand Down
1 change: 1 addition & 0 deletions src/highlighting/highlighter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ impl<'a, 'b> Iterator for HighlightIterator<'a, 'b> {
match command {
ScopeStackOp::Push(scope) => {
self.state.path.push(scope);
// println!("{}", self.state.path);
self.state
.styles
.push(style.apply(self.highlighter.get_style(self.state.path.as_slice())));
Expand Down
61 changes: 40 additions & 21 deletions src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use super::syntax_definition::*;
use super::scope::*;
use onig::{self, Region};
use std::usize;
use std::collections::HashMap;
use std::i32;

/// Keeps the current parser state (the internal syntax interpreter stack) between lines of parsing.
Expand Down Expand Up @@ -40,8 +41,8 @@ struct RegexMatch {
pat_index: usize,
}

// TODO cache actual matching regions
type MatchCache = Vec<bool>;
/// maps the pattern to the start index, which is -1 if not found.
type SearchCache = HashMap<*const MatchPattern,Option<Region>>;

impl ParseState {
/// Create a state from a syntax, keeps its own reference counted
Expand Down Expand Up @@ -83,10 +84,10 @@ impl ParseState {
}

let mut regions = Region::with_capacity(8);
let mut match_cache: MatchCache = Vec::with_capacity(64); // TODO find best capacity
let mut search_cache: SearchCache = HashMap::with_capacity(128); // TODO find the best capacity
while self.parse_next_token(line,
&mut match_start,
&mut match_cache,
&mut search_cache,
&mut regions,
&mut res) {
}
Expand All @@ -96,7 +97,7 @@ impl ParseState {
fn parse_next_token(&mut self,
line: &str,
start: &mut usize,
cache: &mut MatchCache,
search_cache: &mut SearchCache,
regions: &mut Region,
ops: &mut Vec<(usize, ScopeStackOp)>)
-> bool {
Expand All @@ -114,19 +115,37 @@ impl ParseState {
.chain(prototype.into_iter())
.chain(Some(cur_level.context.clone()).into_iter());
// println!("{:#?}", cur_level);
let mut overall_index = 0;
// println!("token at {} on {}", start, line.trim_right());
for ctx in context_chain {
for (pat_context_ptr, pat_index) in context_iter(ctx) {
if overall_index < cache.len() && cache[overall_index] == false {
overall_index += 1;
continue; // we've determined this pattern doesn't match this line anywhere
}
let mut pat_context = pat_context_ptr.borrow_mut();
let mut match_pat = pat_context.match_at_mut(pat_index);

// println!("{} - {:?} - {:?}", match_pat.regex_str, match_pat.has_captures, cur_level.captures.is_some());

if let Some(maybe_region) = search_cache.get(&(match_pat as *const MatchPattern)) {
let mut valid_entry = true;
if let &Some(ref region) = maybe_region {
let match_start = region.pos(0).unwrap().0;
if match_start < *start {
valid_entry = false;
}
if match_start < min_start && valid_entry {
// print!("match {} at {} on {}", match_pat.regex_str, match_start, line);
min_start = match_start;
cur_match = Some(RegexMatch {
regions: region.clone(),
context: pat_context_ptr.clone(),
pat_index: pat_index,
});
}
}
if valid_entry {
continue;
}
}

match_pat.ensure_compiled_if_possible();
let refs_regex = if cur_level.captures.is_some() && match_pat.has_captures {
let refs_regex = if match_pat.has_captures && cur_level.captures.is_some() {
let &(ref region, ref s) = cur_level.captures.as_ref().unwrap();
Some(match_pat.compile_with_refs(region, s))
} else {
Expand All @@ -142,27 +161,30 @@ impl ParseState {
line.len(),
onig::SEARCH_OPTION_NONE,
Some(regions));
if overall_index >= cache.len() {
cache.push(matched.is_some());
} // TODO update the cache even if this is another time over
if let Some(match_start) = matched {
let match_end = regions.pos(0).unwrap().1;
// this is necessary to avoid infinite looping on dumb patterns
let does_something = match match_pat.operation {
MatchOperation::None => match_start != match_end,
_ => true,
};
if refs_regex.is_none() && does_something {
search_cache.insert(match_pat, Some(regions.clone()));
}
if match_start < min_start && does_something {
// print!("catch {} at {} on {}", match_pat.regex_str, match_start, line);
min_start = match_start;
cur_match = Some(RegexMatch {
regions: regions.clone(),
context: pat_context_ptr.clone(),
pat_index: pat_index,
});
}
} else {
if refs_regex.is_none() {
search_cache.insert(match_pat, None);
}
}

overall_index += 1;
}
}
cur_match
Expand All @@ -172,10 +194,7 @@ impl ParseState {
let (_, match_end) = reg_match.regions.pos(0).unwrap();
*start = match_end;
let level_context = self.stack[self.stack.len() - 1].context.clone();
let stack_changed = self.exec_pattern(line, reg_match, level_context, ops);
if stack_changed {
cache.clear();
}
self.exec_pattern(line, reg_match, level_context, ops);
true
} else {
false
Expand Down
2 changes: 1 addition & 1 deletion src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pub fn as_24_bit_terminal_escaped(v: &[(Style, &str)], bg: bool) -> String {
/// with visual alignment to the line. Obviously for debugging.
pub fn debug_print_ops(line: &str, ops: &Vec<(usize, ScopeStackOp)>) {
for &(i, ref op) in ops.iter() {
println!("{}", line);
println!("{}", line.trim_right());
print!("{: <1$}", "", i);
match op {
&ScopeStackOp::Push(s) => {
Expand Down

0 comments on commit 881f0e0

Please sign in to comment.