Skip to content
This repository has been archived by the owner on Feb 12, 2018. It is now read-only.

Commit

Permalink
Fix parser bugs involving prototypes and variables
Browse files Browse the repository at this point in the history
This commit overhauls the way prototypes work to line up with how
Sublime Text does them:
- Prototypes are included in the same chain as with_prototype, meaning
that meta_include_prototype false means no prototypes even in includes.
- All contexts included recursively from the prototype context act like
they have meta_include_prototype false.

It also adds support for variables referencing other variables, which
is apparently a thing.
  • Loading branch information
trishume committed Jun 26, 2016
1 parent 28fb3bf commit b6081a3
Show file tree
Hide file tree
Showing 14 changed files with 242 additions and 44 deletions.
Binary file modified assets/default_newlines.packdump
Binary file not shown.
Binary file modified assets/default_nonewlines.packdump
Binary file not shown.
2 changes: 2 additions & 0 deletions examples/syncat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use std::io::BufRead;

fn main() {
let ss = SyntaxSet::load_defaults_nonewlines();
// use this format to load your own set of packages
// let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
let ts = ThemeSet::load_defaults();

let args: Vec<String> = std::env::args().collect();
Expand Down
3 changes: 3 additions & 0 deletions src/dumps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ impl SyntaxSet {
/// Instantiates a new syntax set from a binary dump of
/// Sublime Text's default open source syntax definitions and then links it.
/// These dumps are included in this library's binary for convenience.
///
/// This method loads the version for parsing line strings with no `\n` characters at the end.
/// If you're able to efficiently include newlines at the end of strings, use `load_defaults_newlines`
/// since it works better. See `SyntaxSet#load_syntaxes` for more info on this issue.
///
/// This is the recommended way of creating a syntax set for
/// non-advanced use cases. It is also significantly faster than loading the YAML files.
Expand Down
23 changes: 22 additions & 1 deletion src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,33 @@ mod tests {
let s = include_str!("../testdata/highlight_test.erb");
let syntax = ss.find_syntax_by_extension("erb").unwrap();
let html = highlighted_snippet_for_string(s, syntax, &ts.themes["base16-ocean.dark"]);
println!("{}", html);
assert_eq!(html, include_str!("../testdata/test3.html"));
let html2 = highlighted_snippet_for_file("testdata/highlight_test.erb",
&ss,
&ts.themes["base16-ocean.dark"])
.unwrap();
assert_eq!(html2, html);

// YAML is a tricky syntax and InspiredGitHub is a fancy theme, this is basically an integration test
let html3 = highlighted_snippet_for_file("testdata/Packages/Rust/Cargo.sublime-syntax",
&ss,
&ts.themes["InspiredGitHub"])
.unwrap();
println!("{}", html3);
assert_eq!(html3, include_str!("../testdata/test4.html"));
}

#[test]
fn tricky_test_syntax() {
// This syntax I wrote tests edge cases of prototypes
// I verified the output HTML against what ST3 does with the same syntax and file
let ss = SyntaxSet::load_from_folder("testdata").unwrap();
let ts = ThemeSet::load_defaults();
let html = highlighted_snippet_for_file("testdata/testing-syntax.testsyntax",
&ss,
&ts.themes["base16-ocean.dark"])
.unwrap();
println!("{}", html);
assert_eq!(html, include_str!("../testdata/test5.html"));
}
}
7 changes: 6 additions & 1 deletion src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,14 @@ impl ParseState {
let cur_level = &self.stack[self.stack.len() - 1];
let mut min_start = usize::MAX;
let mut cur_match: Option<RegexMatch> = None;
let prototype: Option<ContextPtr> = {
let ctx_ref = cur_level.context.borrow();
ctx_ref.prototype.clone()
};
let context_chain = self.stack
.iter()
.filter_map(|lvl| lvl.prototype.as_ref().map(|x| x.clone()))
.chain(prototype.into_iter())
.chain(Some(cur_level.context.clone()).into_iter());
// println!("{:#?}", cur_level);
let mut overall_index = 0;
Expand Down Expand Up @@ -188,7 +193,7 @@ impl ParseState {
let context = reg_match.context.borrow();
let pat = context.match_at(reg_match.pat_index);
let level_context = level_context_ptr.borrow();
// println!("running pattern {:?}", pat);
// println!("running pattern {:?} on '{}' at {}", pat.regex_str, line, match_start);

self.push_meta_ops(true, match_start, &*level_context, &pat.operation, ops);
for s in pat.scope.iter() {
Expand Down
8 changes: 8 additions & 0 deletions src/parsing/syntax_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ pub struct SyntaxDefinition {
pub scope: Scope,
pub first_line_match: Option<String>,
pub hidden: bool,
/// Filled in at link time to avoid serializing it multiple times
pub prototype: Option<ContextPtr>,

pub variables: HashMap<String, String>,
pub contexts: HashMap<String, ContextPtr>,
Expand All @@ -37,7 +39,13 @@ pub struct SyntaxDefinition {
pub struct Context {
pub meta_scope: Vec<Scope>,
pub meta_content_scope: Vec<Scope>,
/// This being set false in the syntax file implies this field being set false,
/// but it can also be set falso for contexts that don't include the prototype for other reasons
pub meta_include_prototype: bool,
/// This is filled in by the linker at link time
/// for contexts that have `meta_include_prototype==true`
/// and are not included from the prototype.
pub prototype: Option<ContextPtr>,
pub uses_backrefs: bool,

pub patterns: Vec<Pattern>,
Expand Down
72 changes: 64 additions & 8 deletions src/parsing/syntax_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use super::super::LoadingError;

use std::path::Path;
use walkdir::WalkDir;
use std::io::{Read, self, BufRead, BufReader};
use std::io::{self, Read, BufRead, BufReader};
use std::fs::File;
use std::ops::DerefMut;
use std::mem;
Expand Down Expand Up @@ -165,7 +165,9 @@ impl SyntaxSet {
/// .unwrap_or_else(|| ss.find_syntax_plain_text());
/// assert_eq!(syntax.name, "HTML (Rails)");
/// ```
pub fn find_syntax_for_file<'a, P: AsRef<Path>>(&'a self, path_obj: P) -> io::Result<Option<&'a SyntaxDefinition>> {
pub fn find_syntax_for_file<'a, P: AsRef<Path>>(&'a self,
path_obj: P)
-> io::Result<Option<&'a SyntaxDefinition>> {
let path: &Path = path_obj.as_ref();
let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
let ext_syntax = self.find_syntax_by_extension(extension);
Expand Down Expand Up @@ -208,16 +210,63 @@ impl SyntaxSet {
/// which is why it isn't done by default, except by the load_from_folder constructor.
/// This operation is idempotent, but takes time even on already linked syntax sets.
pub fn link_syntaxes(&mut self) {
// 2 loops necessary to satisfy borrow checker :-(
for syntax in self.syntaxes.iter_mut() {
if let Some(ref proto_ptr) = syntax.contexts.get("prototype") {
let mut mut_ref = proto_ptr.borrow_mut();
Self::recursively_mark_no_prototype(syntax, mut_ref.deref_mut());
syntax.prototype = Some((*proto_ptr).clone());
}
}
for syntax in self.syntaxes.iter() {
for (_, ref context_ptr) in syntax.contexts.iter() {
for (_, context_ptr) in syntax.contexts.iter() {
let mut mut_ref = context_ptr.borrow_mut();
self.link_context(syntax, mut_ref.deref_mut());
}
}
self.is_linked = true;
}

/// Anything recursively included by the prototype shouldn't include the prototype.
/// This marks them as such.
fn recursively_mark_no_prototype(syntax: &SyntaxDefinition, context: &mut Context) {
context.meta_include_prototype = false;
for pattern in context.patterns.iter_mut() {
match *pattern {
/// Apparently inline blocks also don't include the prototype when within the prototype.
/// This is really weird, but necessary to run the YAML syntax.
Pattern::Match(ref mut match_pat) => {
let maybe_context_refs = match match_pat.operation {
MatchOperation::Push(ref context_refs) => Some(context_refs),
MatchOperation::Set(ref context_refs) => Some(context_refs),
MatchOperation::Pop | MatchOperation::None => None,
};
if let Some(context_refs) = maybe_context_refs {
for context_ref in context_refs.iter() {
if let &ContextReference::Inline(ref context_ptr) = context_ref {
let mut mut_ref = context_ptr.borrow_mut();
Self::recursively_mark_no_prototype(syntax, mut_ref.deref_mut());
}
}
}
}
Pattern::Include(ContextReference::Named(ref s)) => {
if let Some(context_ptr) = syntax.contexts.get(s) {
let mut mut_ref = context_ptr.borrow_mut();
Self::recursively_mark_no_prototype(syntax, mut_ref.deref_mut());
}
}
_ => (),
}
}
}

fn link_context(&self, syntax: &SyntaxDefinition, context: &mut Context) {
if context.meta_include_prototype == true {
if let Some(ref proto_ptr) = syntax.prototype {
context.prototype = Some((*proto_ptr).clone());
}
}
for pattern in context.patterns.iter_mut() {
match *pattern {
Pattern::Match(ref mut match_pat) => self.link_match_pat(syntax, match_pat),
Expand Down Expand Up @@ -297,7 +346,7 @@ impl FirstLineCache {
for (i, syntax) in syntaxes[self.cached_until..].iter().enumerate() {
if let Some(ref reg_str) = syntax.first_line_match {
if let Ok(reg) = Regex::new(reg_str) {
self.regexes.push((reg,i));
self.regexes.push((reg, i));
}
}
}
Expand Down Expand Up @@ -337,16 +386,23 @@ mod tests {
#[test]
fn can_load() {
let mut ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
assert_eq!(&ps.find_syntax_by_first_line("#!/usr/bin/env node").unwrap().name, "JavaScript");
assert_eq!(&ps.find_syntax_by_first_line("#!/usr/bin/env node").unwrap().name,
"JavaScript");
ps.load_plain_text_syntax();
let rails_scope = Scope::new("source.ruby.rails").unwrap();
let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
ps.find_syntax_plain_text();
assert_eq!(&ps.find_syntax_by_extension("rake").unwrap().name, "Ruby");
assert_eq!(&ps.find_syntax_by_token("ruby").unwrap().name, "Ruby");
assert_eq!(&ps.find_syntax_by_first_line("lol -*- Mode: C -*- such line").unwrap().name, "C");
assert_eq!(&ps.find_syntax_for_file("testdata/parser.rs").unwrap().unwrap().name, "Rust");
assert_eq!(&ps.find_syntax_for_file("testdata/test_first_line.test").unwrap().unwrap().name, "Go");
assert_eq!(&ps.find_syntax_by_first_line("lol -*- Mode: C -*- such line").unwrap().name,
"C");
assert_eq!(&ps.find_syntax_for_file("testdata/parser.rs").unwrap().unwrap().name,
"Rust");
assert_eq!(&ps.find_syntax_for_file("testdata/test_first_line.test")
.unwrap()
.unwrap()
.name,
"Go");
assert!(&ps.find_syntax_by_first_line("derp derp hi lol").is_none());
// println!("{:#?}", syntax);
assert_eq!(syntax.scope, rails_scope);
Expand Down
57 changes: 25 additions & 32 deletions src/parsing/yaml_load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ fn str_to_scopes(s: &str, repo: &mut ScopeRepository) -> Result<Vec<Scope>, Pars
struct ParserState<'a> {
scope_repo: &'a mut ScopeRepository,
variables: HashMap<String, String>,
has_prototype: bool,
variable_regex: Regex,
backref_regex: Regex,
short_multibyte_regex: Regex,
Expand Down Expand Up @@ -94,7 +93,6 @@ impl SyntaxDefinition {
let mut state = ParserState {
scope_repo: scope_repo,
variables: variables,
has_prototype: contexts_hash.contains_key(&Yaml::String(String::from("prototype"))),
variable_regex: Regex::new(r"\{\{([A-Za-z0-9_]+)\}\}").unwrap(),
backref_regex: Regex::new(r"\\\d").unwrap(),
short_multibyte_regex: Regex::new(r"\\x([a-fA-F][a-fA-F0-9])").unwrap(),
Expand Down Expand Up @@ -123,6 +121,7 @@ impl SyntaxDefinition {

variables: state.variables.clone(),
contexts: contexts,
prototype: None,
};
Ok(defn)
}
Expand Down Expand Up @@ -155,33 +154,34 @@ impl SyntaxDefinition {
let mut context = Context {
meta_scope: Vec::new(),
meta_content_scope: Vec::new(),
meta_include_prototype: true,
meta_include_prototype: !is_prototype,
uses_backrefs: false,
patterns: Vec::new(),
prototype: None,
};
let mut seen_pattern = false;
for y in vec.iter() {
let map = try!(y.as_hash().ok_or(ParseSyntaxError::TypeMismatch));

let mut is_special = false;
if let Some(x) = get_key(map, "meta_scope", |x| x.as_str()).ok() {
context.meta_scope = try!(str_to_scopes(x, state.scope_repo));
} else if let Some(x) = get_key(map, "meta_content_scope", |x| x.as_str()).ok() {
is_special = true;
}
if let Some(x) = get_key(map, "meta_content_scope", |x| x.as_str()).ok() {
context.meta_content_scope = try!(str_to_scopes(x, state.scope_repo));
} else if let Some(x) = get_key(map, "meta_include_prototype", |x| x.as_bool()).ok() {
is_special = true;
}
if let Some(x) = get_key(map, "meta_include_prototype", |x| x.as_bool()).ok() {
context.meta_include_prototype = x;
} else {
if !seen_pattern && context.meta_include_prototype && state.has_prototype &&
!is_prototype {
seen_pattern = true;
context.patterns
.push(Pattern::Include(ContextReference::Named(String::from("prototype"))));
}
is_special = true;
}
if !is_special {
if let Some(x) = get_key(map, "include", |x| Some(x)).ok() {
let reference = try!(SyntaxDefinition::parse_reference(x, state));
context.patterns.push(Pattern::Include(reference));
} else {
let pattern = try!(SyntaxDefinition::parse_match_pattern(map, state));
if pattern.regex.is_none() {
if pattern.has_captures {
context.uses_backrefs = true;
}
context.patterns.push(Pattern::Match(pattern));
Expand Down Expand Up @@ -229,13 +229,19 @@ impl SyntaxDefinition {
}
}

fn resolve_variables(raw_regex: &str, state: &ParserState) -> String {
state.variable_regex.replace_all(raw_regex, |caps: &Captures| {
let var_regex_raw =
state.variables.get(caps.at(1).unwrap_or("")).map(|x| &**x).unwrap_or("");
Self::resolve_variables(var_regex_raw, state)
})
}

fn parse_match_pattern(map: &BTreeMap<Yaml, Yaml>,
state: &mut ParserState)
-> Result<MatchPattern, ParseSyntaxError> {
let raw_regex = try!(get_key(map, "match", |x| x.as_str()));
let regex_str_1 = state.variable_regex.replace_all(raw_regex, |caps: &Captures| {
state.variables.get(caps.at(1).unwrap_or("")).map(|x| &**x).unwrap_or("").to_owned()
});
let regex_str_1 = Self::resolve_variables(raw_regex, state);
// bug triggered by CSS.sublime-syntax, dunno why this is necessary
let regex_str_2 =
state.short_multibyte_regex.replace_all(&regex_str_1, |caps: &Captures| {
Expand All @@ -250,6 +256,7 @@ impl SyntaxDefinition {
.replace("(?:\\n)?","") // fails with invalid operand of repeat expression
.replace("(?<!\\n)","") // fails with invalid pattern in look-behind
.replace("(?<=\\n)","") // fails with invalid pattern in look-behind
.replace(" :\\s"," :(\\s|\\z)") // hack specific to YAML.sublime-syntax
.replace("\\n","\\z")
};
// println!("{:?}", regex_str);
Expand Down Expand Up @@ -385,21 +392,7 @@ mod tests {
assert_eq!(defn2.contexts["main"].borrow().meta_include_prototype, true);
assert_eq!(defn2.contexts["string"].borrow().meta_scope,
vec![Scope::new("string.quoted.double.c").unwrap()]);
{
let proto_pattern: &Pattern = &defn2.contexts["main"].borrow().patterns[0];
match proto_pattern {
&Pattern::Include(ContextReference::Named(_)) => (),
_ => assert!(false, "Prototype should be included"),
}
let not_proto_pattern: &Pattern = &defn2.contexts["string"].borrow().patterns[0];
match not_proto_pattern {
&Pattern::Include(ContextReference::Named(_)) => {
assert!(false, "Prototype shouldn't be included")
}
_ => (),
}
}
let first_pattern: &Pattern = &defn2.contexts["main"].borrow().patterns[1];
let first_pattern: &Pattern = &defn2.contexts["main"].borrow().patterns[0];
match first_pattern {
&Pattern::Match(ref match_pat) => {
let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
Expand Down
Loading

0 comments on commit b6081a3

Please sign in to comment.