Skip to content
This repository has been archived by the owner on Feb 12, 2018. It is now read-only.

Commit

Permalink
Serialize/Deserialize traits and lazy regex compilation
Browse files Browse the repository at this point in the history
  • Loading branch information
trishume committed Jun 10, 2016
1 parent 435903c commit 3f08862
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 24 deletions.
2 changes: 1 addition & 1 deletion src/package_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ impl PackageSet {
Direct(_) => None,
};
if let Some(new_context) = maybe_new_context {
let mut new_ref = Direct(Rc::downgrade(&new_context));
let mut new_ref = Direct(LinkerLink {link: Rc::downgrade(&new_context)});
mem::swap(context_ref, &mut new_ref);
}
}
Expand Down
7 changes: 4 additions & 3 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,12 @@ impl ParseState {
overall_index += 1;
continue; // we've determined this pattern doesn't match this line anywhere
}
let pat_context = pat_context_ptr.borrow();
let match_pat = pat_context.match_at(pat_index);
let mut pat_context = pat_context_ptr.borrow_mut();
let mut match_pat = pat_context.match_at_mut(pat_index);

// println!("{:?}", match_pat.regex_str);
let refs_regex = if cur_level.captures.is_some() && match_pat.regex.is_none() {
match_pat.ensure_compiled_if_possible();
let refs_regex = if cur_level.captures.is_some() && match_pat.has_captures {
let &(ref region, ref s) = cur_level.captures.as_ref().unwrap();
Some(match_pat.compile_with_refs(region, s))
} else {
Expand Down
30 changes: 25 additions & 5 deletions src/scope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::sync::Mutex;
use std::fmt;
use std::str::FromStr;
use std::u64;
use rustc_serialize::{Encodable, Encoder, Decodable, Decoder};

lazy_static! {
pub static ref SCOPE_REPO: Mutex<ScopeRepository> = Mutex::new(ScopeRepository::new());
Expand Down Expand Up @@ -129,7 +130,7 @@ impl Scope {
(shifted & 0xFFFF) as u16
}

#[inline(always)]
#[inline]
fn missing_atoms(self) -> u32 {
let trail = if self.b == 0 {
self.a.trailing_zeros() + 64
Expand All @@ -145,6 +146,13 @@ impl Scope {
8 - self.missing_atoms()
}

/// returns a string representation of this scope, this requires locking a
/// global repo and shouldn't be done frequently.
fn build_string(self) -> String {
let repo = SCOPE_REPO.lock().unwrap();
repo.to_string(self)
}

/// Tests if this scope is a prefix of another scope.
/// Note that the empty scope is always a prefix.
///
Expand Down Expand Up @@ -202,20 +210,32 @@ impl FromStr for Scope {

impl fmt::Display for Scope {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let repo = SCOPE_REPO.lock().unwrap();
let s = repo.to_string(*self);
let s = self.build_string();
write!(f, "{}", s)
}
}

impl fmt::Debug for Scope {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let repo = SCOPE_REPO.lock().unwrap();
let s = repo.to_string(*self);
let s = self.build_string();
write!(f, "<{}>", s)
}
}

impl Encodable for Scope {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
let st = self.build_string();
s.emit_str(&st)
}
}

impl Decodable for Scope {
fn decode<D: Decoder>(d: &mut D) -> Result<Scope, D::Error> {
let s: String = try!(d.read_str());
Ok(Scope::new(&s).unwrap())
}
}

impl ScopeStack {
pub fn new() -> ScopeStack {
ScopeStack { scopes: Vec::new() }
Expand Down
99 changes: 89 additions & 10 deletions src/syntax_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@ use std::rc::{Rc, Weak};
use std::cell::RefCell;
use scope::*;
use regex_syntax::quote;
use rustc_serialize::{Encodable, Encoder, Decodable, Decoder};

pub type CaptureMapping = HashMap<usize, Vec<Scope>>;
pub type ContextPtr = Rc<RefCell<Context>>;

#[derive(Debug)]
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub struct SyntaxDefinition {
pub name: String,
pub file_extensions: Vec<String>,
pub scope: Scope,
pub first_line_match: Option<Regex>,
pub first_line_match: Option<String>,
pub hidden: bool,

pub variables: HashMap<String, String>,
pub contexts: HashMap<String, ContextPtr>,
}

#[derive(Debug)]
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub struct Context {
pub meta_scope: Vec<Scope>,
pub meta_content_scope: Vec<Scope>,
Expand All @@ -30,22 +31,22 @@ pub struct Context {
pub patterns: Vec<Pattern>,
}

#[derive(Debug)]
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub enum Pattern {
Match(MatchPattern),
Include(ContextReference),
}

#[derive(Debug)]
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub struct MatchIter {
ctx_stack: Vec<ContextPtr>,
index_stack: Vec<usize>,
}

#[derive(Debug)]
pub struct MatchPattern {
pub has_captures: bool,
pub regex_str: String,
// present unless contains backrefs and has to be dynamically compiled
pub regex: Option<Regex>,
pub scope: Vec<Scope>,
pub captures: Option<CaptureMapping>,
Expand All @@ -54,6 +55,11 @@ pub struct MatchPattern {
}

#[derive(Debug)]
pub struct LinkerLink {
pub link: Weak<RefCell<Context>>,
}

#[derive(Debug, RustcEncodable, RustcDecodable)]
pub enum ContextReference {
Named(String),
ByScope {
Expand All @@ -65,10 +71,10 @@ pub enum ContextReference {
sub_context: Option<String>,
},
Inline(ContextPtr),
Direct(Weak<RefCell<Context>>),
Direct(LinkerLink),
}

#[derive(Debug)]
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub enum MatchOperation {
Push(Vec<ContextReference>),
Set(Vec<ContextReference>),
Expand All @@ -95,7 +101,7 @@ impl Iterator for MatchIter {
Pattern::Include(ref ctx_ref) => {
let ctx_ptr = match ctx_ref {
&ContextReference::Inline(ref ctx_ptr) => ctx_ptr.clone(),
&ContextReference::Direct(ref ctx_ptr) => ctx_ptr.upgrade().unwrap(),
&ContextReference::Direct(ref ctx_ptr) => ctx_ptr.link.upgrade().unwrap(),
_ => panic!("Can only iterate patterns after linking: {:?}", ctx_ref),
};
self.ctx_stack.push(ctx_ptr);
Expand Down Expand Up @@ -127,14 +133,21 @@ impl Context {
_ => panic!("bad index to match_at"),
}
}

pub fn match_at_mut(&mut self, index: usize) -> &mut MatchPattern {
match self.patterns[index] {
Pattern::Match(ref mut match_pat) => match_pat,
_ => panic!("bad index to match_at"),
}
}
}

impl ContextReference {
/// find the pointed to context, panics if ref is not linked
pub fn resolve(&self) -> ContextPtr {
match self {
&ContextReference::Inline(ref ptr) => ptr.clone(),
&ContextReference::Direct(ref ptr) => ptr.upgrade().unwrap(),
&ContextReference::Direct(ref ptr) => ptr.link.upgrade().unwrap(),
_ => panic!("Can only call resolve on linked references: {:?}", self),
}
}
Expand Down Expand Up @@ -167,13 +180,78 @@ impl MatchPattern {
reg_str
}

/// Used by the parser to compile a regex which needs to reference
/// regions from another matched pattern.
pub fn compile_with_refs(&self, region: &Region, s: &str) -> Regex {
// TODO don't panic on invalid regex
Regex::with_options(&self.regex_with_substitutes(region, s),
onig::REGEX_OPTION_CAPTURE_GROUP,
Syntax::default())
.unwrap()
}

fn compile_regex(&mut self) {
let compiled = Regex::with_options(&self.regex_str,
onig::REGEX_OPTION_CAPTURE_GROUP,
Syntax::default())
.unwrap();
self.regex = Some(compiled);
}

/// Makes sure the regex is compiled if it doesn't have captures.
/// May compile the regex if it isn't, panicing if compilation fails.
#[inline]
pub fn ensure_compiled_if_possible(&mut self) {
if self.regex.is_none() && !self.has_captures { self.compile_regex(); }
}
}

/// Only valid to use this on a syntax which hasn't been linked up to other syntaxes yet
impl Encodable for MatchPattern {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_struct("MatchPattern", 6, |s| {
try!(s.emit_struct_field("has_captures", 0, |s| self.has_captures.encode(s)));
try!(s.emit_struct_field("regex_str", 1, |s| self.regex_str.encode(s)));
try!(s.emit_struct_field("scope", 2, |s| self.scope.encode(s)));
try!(s.emit_struct_field("captures", 3, |s| self.captures.encode(s)));
try!(s.emit_struct_field("operation", 4, |s| self.operation.encode(s)));
try!(s.emit_struct_field("with_prototype", 5, |s| self.with_prototype.encode(s)));
Ok(())
})
}
}

/// Syntaxes decoded by this won't have compiled regexes
impl Decodable for MatchPattern {
fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
d.read_struct("MatchPattern", 6, |d| {
let match_pat = MatchPattern {
has_captures: try!(d.read_struct_field("has_captures", 0, Decodable::decode)),
regex: None,
regex_str: try!(d.read_struct_field("regex_str", 1, Decodable::decode)),
scope: try!(d.read_struct_field("scope", 2, Decodable::decode)),
captures: try!(d.read_struct_field("captures", 3, Decodable::decode)),
operation: try!(d.read_struct_field("operation", 4, Decodable::decode)),
with_prototype: try!(d.read_struct_field("with_prototype", 5, Decodable::decode)),
};

Ok(match_pat)
})
}
}

/// Just panics, we can't do anything with linked up syntaxes
impl Encodable for LinkerLink {
fn encode<S: Encoder>(&self, _: &mut S) -> Result<(), S::Error> {
panic!("Can't encode syntax definitions which have been linked")
}
}

/// Just panics, we can't do anything with linked up syntaxes
impl Decodable for LinkerLink {
fn decode<D: Decoder>(_: &mut D) -> Result<LinkerLink, D::Error> {
panic!("No linked syntax should ever have gotten encoded")
}
}

#[cfg(test)]
Expand All @@ -183,6 +261,7 @@ mod tests {
fn can_compile_refs() {
use onig::{self, Regex, Region};
let pat = MatchPattern {
has_captures: true,
regex_str: String::from(r"lol \\ \2 \1 '\9' \wz"),
regex: None,
scope: vec![],
Expand Down
7 changes: 2 additions & 5 deletions src/yaml_load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,7 @@ impl SyntaxDefinition {
.map(|v| v.iter().filter_map(|y| y.as_str()).map(|x| x.to_owned()).collect())
.unwrap_or_else(|_| Vec::new())
},
first_line_match: if let Ok(s) = get_key(h, "first_line_match", |x| x.as_str()) {
Some(try!(Regex::new(s).map_err(|e| ParseSyntaxError::RegexCompileError(e))))
} else {
None
},
first_line_match: get_key(h, "first_line_match", |x| x.as_str()).ok().map(|s| s.to_owned()),
hidden: get_key(h, "hidden", |x| x.as_bool()).unwrap_or(false),

variables: state.variables.clone(),
Expand Down Expand Up @@ -289,6 +285,7 @@ impl SyntaxDefinition {
};

let pattern = MatchPattern {
has_captures: regex.is_none(),
regex_str: regex_str,
regex: regex,
scope: scope,
Expand Down

0 comments on commit 3f08862

Please sign in to comment.