Add benchmarks

cobalt-org · Jun 8, 2016 · 729296d · 729296d
1 parent e446f16
commit 729296d
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 6 deletions.
diff --git a/Readme.md b/Readme.md
@@ -11,6 +11,7 @@ It is currently mostly complete and can parse, interpret and highlight based on
 - Work with many languages (accomplished through using existing grammar formats)
 - Be super fast
 - API that is both easy to use, and allows use in fancy text editors with piece tables and incremental re-highlighting and the like
+- Expose internals of the parsing process so text editors can do things like cache parse states and use semantic info for code intelligence
 - High quality highlighting, supporting things like heredocs and complex syntaxes (like Rust's).
 
 ## Screenshots
@@ -35,6 +36,21 @@ There's currently an example program called `syncat` that prints one of the sour
 - [ ] Make it really fast (mosty two hot-paths need caching, same places Textmate 2 caches)
 - [ ] Add C bindings so it can be used as a C library from other languages.
 
+## Performance
+
+Currently `syntect` is reasonably fast but not as fast as it could be. The following perf features are done and to-be-done:
+- [x] Pre-link references between languages (e.g `<script>` tags) so there are no tree traversal string lookups in the hot-path
+- [x] Compact binary representation of scopes to allow quickly passing and copying them around
+- [x] Determine if a scope is a prefix of another scope using bit manipulation in only a few instructions
+- [ ] Cache regex matches to reduce number of times oniguruma is asked to search a line
+- [ ] Cache scope lookups to reduce how much scope matching has to be done to highlight a list of scope operations
+- [ ] Lazily compile regexes so startup time isn't taken compiling a thousand regexs for Actionscript that nobody will use
+
+The current perf numbers are below. These numbers should get vastly better once I implement more of the things above, but they may be sufficient for some use cases.
+- ~220ms to load and link all the syntax definitions in the default Sublime package set. This is ~60% regex compilation and ~35% YAML parsing.
+- ~3.3ms to parse and highlight the 30 line 791 character `testdata/highlight_test.erb` file. This works out to around 9000 lines/second or 239 kilobytes/second.
+- ~250ms end to end for `syncat` to start, load the definitions, highlight the test file and shut down. This is mostly spent loading.
+
 ## License and Acknowledgements
 
 Thanks to [Textmate 2](https://github.com/textmate/textmate) and @defuz's [sublimate](https://github.com/defuz/sublimate) for the existing open source code I used as inspiration and in the case of sublimate's `tmTheme` loader, copy-pasted. All code (including defuz's sublimate code) is released under the MIT license.
diff --git a/benches/highlighting.rs b/benches/highlighting.rs
@@ -0,0 +1,34 @@
+#![feature(test)]
+
+extern crate test;
+extern crate syntect;
+use test::Bencher;
+
+use syntect::package_set::PackageSet;
+use syntect::parser::*;
+use syntect::theme::highlighter::*;
+use syntect::theme::style::*;
+use std::fs::File;
+use std::io::Read;
+
+#[bench]
+fn bench_highlighting(b: &mut Bencher) {
+    let ps = PackageSet::load_from_folder("testdata/Packages").unwrap();
+    let highlighter = Highlighter::new(PackageSet::get_theme("testdata/spacegray/base16-ocean.\
+                                                              dark.tmTheme")
+        .unwrap());
+    let mut f = File::open("testdata/highlight_test.erb").unwrap();
+    let mut s = String::new();
+    f.read_to_string(&mut s).unwrap();
+    let syntax = ps.find_syntax_by_extension("erb").unwrap();
+    b.iter(|| {
+        let mut state = ParseState::new(syntax);
+        let mut highlight_state = HighlightState::new(&highlighter, state.scope_stack.clone());
+        for line in s.lines() {
+            let ops = state.parse_line(&line);
+            let iter = HighlightIterator::new(&mut highlight_state, &ops[..], &line, &highlighter);
+            let regions: Vec<(Style, &str)> = iter.collect();
+            test::black_box(&regions);
+        }
+    });
+}
diff --git a/benches/loading.rs b/benches/loading.rs
@@ -0,0 +1,15 @@
+#![feature(test)]
+
+extern crate test;
+extern crate syntect;
+use test::Bencher;
+
+use syntect::package_set::PackageSet;
+
+#[bench]
+fn bench_load_syntaxes(b: &mut Bencher) {
+    b.iter(|| {
+        let mut ps = PackageSet::new();
+        ps.load_syntaxes("testdata/Packages", false).unwrap();
+    });
+}
diff --git a/src/yaml_load.rs b/src/yaml_load.rs
@@ -47,7 +47,9 @@ struct ParserState<'a> {
 }
 
 impl SyntaxDefinition {
-    pub fn load_from_str(s: &str, lines_include_newline: bool) -> Result<SyntaxDefinition, ParseSyntaxError> {
+    pub fn load_from_str(s: &str,
+                         lines_include_newline: bool)
+                         -> Result<SyntaxDefinition, ParseSyntaxError> {
         let docs = match YamlLoader::load_from_str(s) {
             Ok(x) => x,
             Err(e) => return Err(ParseSyntaxError::InvalidYaml(e)),
@@ -224,9 +226,10 @@ impl SyntaxDefinition {
             state.variables.get(caps.at(1).unwrap_or("")).map(|x| &**x).unwrap_or("").to_owned()
         });
         // bug triggered by CSS.sublime-syntax, dunno why this is necessary
-        let regex_str_2 = state.short_multibyte_regex.replace_all(&regex_str_1, |caps: &Captures| {
-            format!("\\x{{000000{}}}", caps.at(1).unwrap_or(""))
-        });
+        let regex_str_2 =
+            state.short_multibyte_regex.replace_all(&regex_str_1, |caps: &Captures| {
+                format!("\\x{{000000{}}}", caps.at(1).unwrap_or(""))
+            });
         // if the passed in strings don't include newlines (unlike Sublime) we can't match on them
         let regex_str = if state.lines_include_newline {
             regex_str_2
@@ -320,7 +323,8 @@ mod tests {
         use syntax_definition::*;
         use scope::*;
         let defn: SyntaxDefinition =
-            SyntaxDefinition::load_from_str("name: C\nscope: source.c\ncontexts: {main: []}")
+            SyntaxDefinition::load_from_str("name: C\nscope: source.c\ncontexts: {main: []}",
+                                            false)
                 .unwrap();
         assert_eq!(defn.name, "C");
         assert_eq!(defn.scope, Scope::new("source.c").unwrap());
@@ -359,7 +363,8 @@ mod tests {
               scope: constant.character.escape.c
             - match: '\"'
               pop: true
-        ")
+        ",
+                                            false)
                 .unwrap();
         assert_eq!(defn2.name, "C");
         assert_eq!(defn2.scope, Scope::new("source.c").unwrap());