From 8a212132f3450a01572c08dbfc7403072bbc85bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jalil=20David=20Salam=C3=A9=20Messina?= Date: Sat, 27 Apr 2024 11:58:25 +0200 Subject: [PATCH] feat: partial support for v2 syntax [#323] This adds support for the `extends` keyword. The implementation is very hacky right now: - We now require an optional `base_syntax` when parsing a syntax, if the `base_syntax` is not present and we find an `extends` keyword at the top level, we error out reporting which syntax file we expected. - SyntaxSet collects this error and resolves the syntaxes in a loop until no more syntaxes can be resolved. - We don't handle multiple-inheritance (`extends` might be a list). - We don't re-evaluate the `contexts` as defined in the spec (you can override variables and have that affect the context). - We only handle `extends` for syntaxes added through `SyntaxSet::load_from_folder`. --- src/parsing/syntax_definition.rs | 23 ++++++++- src/parsing/syntax_set.rs | 89 +++++++++++++++++++++++++++++++- src/parsing/yaml_load.rs | 89 +++++++++++++++++++++++++++++--- testdata/Packages | 2 +- 4 files changed, 191 insertions(+), 12 deletions(-) diff --git a/src/parsing/syntax_definition.rs b/src/parsing/syntax_definition.rs index d73c3e7..856c42e 100644 --- a/src/parsing/syntax_definition.rs +++ b/src/parsing/syntax_definition.rs @@ -51,7 +51,7 @@ pub struct Context { pub meta_scope: Vec, pub meta_content_scope: Vec, /// This being set false in the syntax file implies this field being set false, - /// but it can also be set falso for contexts that don't include the prototype for other reasons + /// but it can also be set false for contexts that don't include the prototype for other reasons pub meta_include_prototype: bool, pub clear_scopes: Option, /// This is filled in by the linker at link time @@ -75,6 +75,27 @@ impl Context { prototype: None, } } + + pub(crate) fn extend(&mut self, other: Context) { + let Context { + meta_scope, + meta_content_scope, + meta_include_prototype, + clear_scopes, + prototype, + uses_backrefs, + patterns, + } = other; + self.meta_scope.extend(meta_scope); + self.meta_content_scope.extend(meta_content_scope); + self.meta_include_prototype = meta_include_prototype; + self.clear_scopes = clear_scopes; + if self.prototype.is_none() || prototype.is_some() { + self.prototype = prototype; + } + self.uses_backrefs |= uses_backrefs; + self.patterns.extend(patterns); + } } #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] diff --git a/src/parsing/syntax_set.rs b/src/parsing/syntax_set.rs index 496a493..a1dccbb 100644 --- a/src/parsing/syntax_set.rs +++ b/src/parsing/syntax_set.rs @@ -13,6 +13,7 @@ use std::fs::File; use std::io::{self, BufRead, BufReader}; use std::mem; use std::path::Path; +use std::path::PathBuf; use super::regex::Regex; use crate::parsing::syntax_definition::ContextId; @@ -83,6 +84,7 @@ pub(crate) struct LazyContexts { pub struct SyntaxSetBuilder { syntaxes: Vec, path_syntaxes: Vec<(String, usize)>, + extends_syntaxes: Vec<(PathBuf, String)>, #[cfg(feature = "metadata")] raw_metadata: LoadMetadata, @@ -108,6 +110,23 @@ fn load_syntax_file( .map_err(|e| LoadingError::ParseSyntax(e, format!("{}", p.display()))) } +#[cfg(feature = "yaml-load")] +fn load_syntax_file_with_extends( + p: &Path, + base_syntax: &SyntaxDefinition, + lines_include_newline: bool, +) -> Result { + let s = std::fs::read_to_string(p)?; + + SyntaxDefinition::load_from_str_extended( + &s, + Some(base_syntax), + lines_include_newline, + p.file_stem().and_then(|x| x.to_str()), + ) + .map_err(|e| LoadingError::ParseSyntax(e, format!("{}", p.display()))) +} + impl Clone for SyntaxSet { fn clone(&self) -> SyntaxSet { SyntaxSet { @@ -375,6 +394,7 @@ impl SyntaxSet { SyntaxSetBuilder { syntaxes: builder_syntaxes, path_syntaxes, + extends_syntaxes: Vec::new(), #[cfg(feature = "metadata")] existing_metadata: Some(metadata), #[cfg(feature = "metadata")] @@ -516,6 +536,8 @@ impl SyntaxSetBuilder { folder: P, lines_include_newline: bool, ) -> Result<(), LoadingError> { + use super::ParseSyntaxError; + for entry in crate::utils::walk_dir(folder).sort_by(|a, b| a.file_name().cmp(b.file_name())) { let entry = entry.map_err(LoadingError::WalkDir)?; @@ -524,7 +546,27 @@ impl SyntaxSetBuilder { .extension() .map_or(false, |e| e == "sublime-syntax") { - let syntax = load_syntax_file(entry.path(), lines_include_newline)?; + let syntax = match load_syntax_file(entry.path(), lines_include_newline) { + Ok(syntax) => syntax, + // We are extending another syntax, look it up in the set first + Err(LoadingError::ParseSyntax( + ParseSyntaxError::ExtendsNotFound { name, extends }, + _, + )) => { + if let Some(ix) = self + .path_syntaxes + .iter() + .find(|(s, _)| s.ends_with(extends.as_str())) + .map(|(_, ix)| *ix) + { + todo!("lookup {ix} and pass to {name}"); + } + self.extends_syntaxes + .push((entry.path().to_path_buf(), extends)); + continue; + } + Err(err) => return Err(err), + }; if let Some(path_str) = entry.path().to_str() { // Split the path up and rejoin with slashes so that syntaxes loaded on Windows // can still be loaded the same way. @@ -550,6 +592,45 @@ impl SyntaxSetBuilder { Ok(()) } + fn resolve_extends(&mut self) { + let mut prev_len = usize::MAX; + // Loop while syntaxes are being resolved + while !self.extends_syntaxes.is_empty() && prev_len > self.extends_syntaxes.len() { + prev_len = self.extends_syntaxes.len(); + // Split borrows to make the borrow cheker happy + let syntaxes = &mut self.syntaxes; + let paths = &mut self.path_syntaxes; + // Resolve syntaxes + self.extends_syntaxes.retain(|(path, extends)| { + let Some(ix) = paths + .iter() + .find(|(s, _)| s.ends_with(extends.as_str())) + .map(|(_, ix)| *ix) + else { + return true; + }; + let base_syntax = &syntaxes[ix]; + // FIXME: don't unwrap + let syntax = load_syntax_file_with_extends(path, base_syntax, false).unwrap(); + if let Some(path_str) = path.to_str() { + // Split the path up and rejoin with slashes so that syntaxes loaded on Windows + // can still be loaded the same way. + let path = Path::new(path_str); + let path_parts: Vec<_> = path.iter().map(|c| c.to_str().unwrap()).collect(); + paths.push((path_parts.join("/").to_string(), syntaxes.len())); + } + syntaxes.push(syntax); + false + }); + } + + if !self.extends_syntaxes.is_empty() { + dbg!(&self.path_syntaxes); + dbg!(&self.extends_syntaxes); + todo!("warn, unresolved syntaxes"); + } + } + /// Build a [`SyntaxSet`] from the syntaxes that have been added to this /// builder. /// @@ -571,16 +652,20 @@ impl SyntaxSetBuilder { /// directly load the [`SyntaxSet`]. /// /// [`SyntaxSet`]: struct.SyntaxSet.html - pub fn build(self) -> SyntaxSet { + pub fn build(mut self) -> SyntaxSet { + self.resolve_extends(); + #[cfg(not(feature = "metadata"))] let SyntaxSetBuilder { syntaxes: syntax_definitions, path_syntaxes, + extends_syntaxes: _, } = self; #[cfg(feature = "metadata")] let SyntaxSetBuilder { syntaxes: syntax_definitions, path_syntaxes, + extends_syntaxes: _, raw_metadata, existing_metadata, } = self; diff --git a/src/parsing/yaml_load.rs b/src/parsing/yaml_load.rs index 24f32a2..6a4733f 100644 --- a/src/parsing/yaml_load.rs +++ b/src/parsing/yaml_load.rs @@ -32,6 +32,9 @@ pub enum ParseSyntaxError { /// Syntaxes must have a context named "main" #[error("Context 'main' is missing")] MainMissing, + /// This syntax extends another syntax which is not available + #[error("Syntax for {name} extends {extends}, but {extends} could not be found")] + ExtendsNotFound { name: String, extends: String }, /// Some part of the YAML file is the wrong type (e.g a string but should be a list) /// Sorry this doesn't give you any way to narrow down where this is. /// Maybe use Sublime Text to figure it out. @@ -86,6 +89,15 @@ impl SyntaxDefinition { s: &str, lines_include_newline: bool, fallback_name: Option<&str>, + ) -> Result { + SyntaxDefinition::load_from_str_extended(s, None, lines_include_newline, fallback_name) + } + + pub(crate) fn load_from_str_extended( + s: &str, + extends: Option<&SyntaxDefinition>, + lines_include_newline: bool, + fallback_name: Option<&str>, ) -> Result { let docs = match YamlLoader::load_from_str(s) { Ok(x) => x, @@ -98,6 +110,7 @@ impl SyntaxDefinition { let mut scope_repo = SCOPE_REPO.lock().unwrap(); SyntaxDefinition::parse_top_level( doc, + extends, scope_repo.deref_mut(), lines_include_newline, fallback_name, @@ -106,13 +119,18 @@ impl SyntaxDefinition { fn parse_top_level( doc: &Yaml, + extends: Option<&SyntaxDefinition>, scope_repo: &mut ScopeRepository, lines_include_newline: bool, fallback_name: Option<&str>, ) -> Result { let h = doc.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?; - let mut variables = HashMap::new(); + // Get variables from cloned syntax, will be overritten if the same is present as detailed + // in the spec + let mut variables = extends + .map(|syntax| syntax.variables.clone()) + .unwrap_or_default(); if let Ok(map) = get_key(h, "variables", |x| x.as_hash()) { for (key, value) in map.iter() { if let (Some(key_str), Some(val_str)) = (key.as_str(), value.as_str()) { @@ -120,6 +138,21 @@ impl SyntaxDefinition { } } } + + let name = get_key(h, "name", |x| x.as_str()) + .unwrap_or_else(|_| fallback_name.unwrap_or("Unnamed")) + .to_owned(); + // FIXME: extends is allowed to be a list also + let extends = match (get_key(h, "extends", |x| x.as_str()), extends) { + (Ok(base_syntax), None) => { + return Err(ParseSyntaxError::ExtendsNotFound { + name, + extends: base_syntax.to_string(), + }) + } + (Ok(_), Some(base_syntax)) => Some(base_syntax), + (Err(_), _) => None, + }; let contexts_hash = get_key(h, "contexts", |x| x.as_hash())?; let top_level_scope = scope_repo .build(get_key(h, "scope", |x| x.as_str())?) @@ -132,7 +165,11 @@ impl SyntaxDefinition { lines_include_newline, }; - let mut contexts = SyntaxDefinition::parse_contexts(contexts_hash, &mut state)?; + let mut contexts = SyntaxDefinition::parse_contexts( + contexts_hash, + extends.map(|syntax| &syntax.contexts), + &mut state, + )?; if !contexts.contains_key("main") { return Err(ParseSyntaxError::MainMissing); } @@ -147,9 +184,7 @@ impl SyntaxDefinition { } let defn = SyntaxDefinition { - name: get_key(h, "name", |x| x.as_str()) - .unwrap_or_else(|_| fallback_name.unwrap_or("Unnamed")) - .to_owned(), + name, scope: top_level_scope, file_extensions, // TODO maybe cache a compiled version of this Regex @@ -166,9 +201,11 @@ impl SyntaxDefinition { fn parse_contexts( map: &Hash, + extends: Option<&HashMap>, state: &mut ParserState<'_>, ) -> Result, ParseSyntaxError> { - let mut contexts = HashMap::new(); + // FIXME: contexts need to be re-evaluated with the new values of the variables + let mut contexts = extends.cloned().unwrap_or_default(); for (key, value) in map.iter() { if let (Some(name), Some(val_vec)) = (key.as_str(), value.as_vec()) { let is_prototype = name == "prototype"; @@ -194,13 +231,31 @@ impl SyntaxDefinition { is_prototype: bool, namer: &mut ContextNamer, ) -> Result { + enum InsertMode { + Replace, + Prepend, + Append, + } let mut context = Context::new(!is_prototype); let name = namer.next(); + let mut insert = InsertMode::Replace; for y in vec.iter() { let map = y.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?; let mut is_special = false; + if let Ok(x) = get_key(map, "meta_prepend", |x| x.as_bool()) { + if x { + insert = InsertMode::Prepend; + } + is_special = true; + } + if let Ok(x) = get_key(map, "meta_append", |x| x.as_bool()) { + if x { + insert = InsertMode::Append; + } + is_special = true; + } if let Ok(x) = get_key(map, "meta_scope", |x| x.as_str()) { context.meta_scope = str_to_scopes(x, state.scope_repo)?; is_special = true; @@ -237,7 +292,26 @@ impl SyntaxDefinition { } } - contexts.insert(name.clone(), context); + match insert { + InsertMode::Replace => { + contexts.insert(name.clone(), context); + } + InsertMode::Append => { + contexts + .entry(name.clone()) + .and_modify(|ctx| ctx.extend(context.clone())) + .or_insert(context); + } + InsertMode::Prepend => { + contexts + .entry(name.clone()) + .and_modify(|ctx| { + context.extend(ctx.clone()); + *ctx = context.clone(); + }) + .or_insert(context); + } + } Ok(name) } @@ -887,7 +961,6 @@ impl<'a> Parser<'a> { #[cfg(test)] mod tests { use super::*; - use crate::parsing::syntax_definition::*; use crate::parsing::Scope; #[test] diff --git a/testdata/Packages b/testdata/Packages index 483657a..40ec1f2 160000 --- a/testdata/Packages +++ b/testdata/Packages @@ -1 +1 @@ -Subproject commit 483657a3db466716505255c6380f57698354508b +Subproject commit 40ec1f2f9b56fb55d739e17ecd003cc9e8c9b096