diff --git a/src/lib.rs b/src/lib.rs index aaa3353..11abd7f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,6 +55,7 @@ #![warn(missing_docs, missing_debug_implementations)] pub mod error; + pub use error::Error; mod map; @@ -78,12 +79,8 @@ where M: VariableMap<'a> + ?Sized, M::Value: AsRef, { - let mut output = Vec::with_capacity(source.len() + source.len() / 10); - substitute_impl(&mut output, source.as_bytes(), 0..source.len(), variables, &|x| { - x.as_ref().as_bytes() - })?; - // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. - unsafe { Ok(String::from_utf8_unchecked(output)) } + let template = Template::parse(source)?; + template.expand(variables) } /// Substitute variables in a byte string. @@ -102,19 +99,133 @@ where M: VariableMap<'a> + ?Sized, M::Value: AsRef<[u8]>, { + let template = parse_template(source, &(0..source.len()))?; let mut output = Vec::with_capacity(source.len() + source.len() / 10); - substitute_impl(&mut output, source, 0..source.len(), variables, &|x| x.as_ref())?; + expand_template_impl(&template, &mut output, variables, &|x| x.as_ref())?; Ok(output) } -/// Substitute variables in a byte string. -/// -/// This is the real implementation used by both [`substitute`] and [`substitute_bytes`]. -/// The function accepts any type that implements [`VariableMap`], and a function to convert the value from the map into bytes. -fn substitute_impl<'a, M, F>( +#[derive(Debug, PartialEq, Eq)] +enum TemplatePart<'a> { + Literal(LiteralTemplate<'a>), + Variable(Variable<'a>), + EscapedChar(EscapedCharTemplate), +} + +#[derive(Debug, PartialEq, Eq)] +struct LiteralTemplate<'a> { + text: &'a [u8], +} + +trait ByteLength { + fn size(&self) -> usize; +} + +impl<'a> ByteLength for TemplatePart<'a> { + fn size(&self) -> usize { + match self { + Self::Literal(l) => l.size(), + Self::Variable(v) => v.size(), + Self::EscapedChar(e) => e.size(), + } + } +} + +impl<'a> ByteLength for LiteralTemplate<'a> { + fn size(&self) -> usize { + self.text.len() + } +} + +impl<'a> ByteLength for Variable<'a> { + fn size(&self) -> usize { + self.part_end - self.part_start + } +} + +impl ByteLength for EscapedCharTemplate { + fn size(&self) -> usize { + 2 + } +} + +#[derive(Debug, PartialEq, Eq)] +struct EscapedCharTemplate { + name: u8, +} + +fn parse_template_one_step<'a>( + finger: usize, + source: &'a [u8], + range: &std::ops::Range, +) -> Result>, Error> { + if finger >= range.end { + return Ok(None); // end of input is reached + } + + let c = source.get(finger).unwrap(); + + let part: TemplatePart = match c { + b'$' => TemplatePart::Variable(parse_variable(source, finger)?), + b'\\' => { + let c = unescape_one(source, finger)?; + TemplatePart::EscapedChar(EscapedCharTemplate { name: c }) + }, + _c0 => match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { + Some(x) => TemplatePart::Literal(LiteralTemplate { + text: &source[finger..finger + x], + }), + None => TemplatePart::Literal(LiteralTemplate { + text: &source[finger..range.end], + }), + }, + }; + + Ok(Some(part)) +} + +/// This class can be constructed by providing a template input string. +/// This input string is parsed into TemplateParts which are stored in memory. +/// With calling `expand` the template gets instantiated by substitution of the variables and escape sequences. +#[derive(Debug)] +pub struct Template<'a> { + source: &'a [u8], + parts: Vec>, +} + +impl<'a> Template<'a> { + /// Creates a new template from a string + pub fn parse(source: &'a str) -> Result { + Ok(Self { + source: source.as_bytes(), + parts: parse_template(source.as_bytes(), &(0..source.len()))?, + }) + } + + /// expands all the fields in the template and returns result + pub fn expand<'b, M>(&self, variables: &'b M) -> Result + where + M: VariableMap<'b> + ?Sized, + M::Value: AsRef, + { + expand_template(&self.parts, variables, Some(self.source.len())) + } +} + +fn parse_template<'a>(source: &'a [u8], range: &std::ops::Range) -> Result>, Error> { + let mut parts = Vec::new(); + let mut finger = range.start; + while let Some(part) = parse_template_one_step(finger, source, range)? { + finger += part.size(); + parts.push(part); + } + + Ok(parts) +} + +fn expand_template_part_variable<'a, M, F>( + variable: &Variable, output: &mut Vec, - source: &[u8], - range: std::ops::Range, variables: &'a M, to_bytes: &F, ) -> Result<(), Error> @@ -122,46 +233,129 @@ where M: VariableMap<'a> + ?Sized, F: Fn(&M::Value) -> &[u8], { - let mut finger = range.start; - while finger < range.end { - let next = match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { - Some(x) => finger + x, - None => break, - }; + let value = variables.get(variable.name); + match (&value, &variable.default) { + (None, None) => { + return Err(error::NoSuchVariable { + position: variable.name_start, + name: variable.name.to_owned(), + } + .into()) + }, + (Some(value), _) => { + output.extend_from_slice(to_bytes(value)); + }, + (None, Some(default)) => { + expand_template_impl(default, output, variables, to_bytes)?; + }, + } - output.extend_from_slice(&source[finger..next]); - if source[next] == b'\\' { - output.push(unescape_one(source, next)?); - finger = next + 2; - } else { - let variable = parse_variable(source, next)?; - let value = variables.get(variable.name); - match (&value, &variable.default) { - (None, None) => { - return Err(error::NoSuchVariable { - position: variable.name_start, - name: variable.name.to_owned(), - } - .into()) - }, - (Some(value), _) => { - output.extend_from_slice(to_bytes(value)); - }, - (None, Some(default)) => { - substitute_impl(output, source, default.clone(), variables, to_bytes)?; - }, - }; - finger = variable.end_position; - } + Ok(()) +} + +fn expand_template_part_escaped_char(e: &EscapedCharTemplate, output: &mut Vec) -> Result<(), Error> { + output.push(e.name); + Ok(()) +} + +fn expand_template_part_literal(l: &LiteralTemplate, output: &mut Vec) -> Result<(), Error> { + output.extend_from_slice(l.text); + Ok(()) +} + +fn expand_template_part<'a, M, F>( + tp: &TemplatePart, + output: &mut Vec, + variables: &'a M, + to_bytes: &F, +) -> Result<(), Error> +where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], +{ + match tp { + TemplatePart::Literal(l) => expand_template_part_literal(l, output)?, + TemplatePart::Variable(v) => expand_template_part_variable(v, output, variables, to_bytes)?, + TemplatePart::EscapedChar(e) => expand_template_part_escaped_char(e, output)?, + } + + Ok(()) +} + +fn expand_template_impl<'a, M, F>( + t: &Vec, + output: &mut Vec, + variables: &'a M, + to_bytes: &F, +) -> Result<(), Error> +where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], +{ + for part in t { + expand_template_part(part, output, variables, to_bytes)?; } - output.extend_from_slice(&source[finger..range.end]); Ok(()) } +/// takes a template and variable map to generate output +fn expand_template<'a, M>(t: &Vec, variables: &'a M, source_size: Option) -> Result +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef, +{ + let output = evaluate_template_to_bytes(t, variables, source_size)?; + // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. + unsafe { Ok(String::from_utf8_unchecked(output)) } +} + +fn evaluate_template_to_bytes<'a, M>( + t: &Vec, + variables: &'a M, + source_size: Option, +) -> Result, Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef, +{ + let source_size = if let Some(source_size) = source_size { + source_size + } else { + 0 + }; + let mut output = Vec::with_capacity(source_size + source_size / 10); + expand_template_impl(t, &mut output, variables, &|x| x.as_ref().as_bytes())?; + Ok(output) +} + +/// does one sub-step of substitute. +pub fn substitute_one<'a, M>(source: &str, variables: &'a M) -> Result<(usize, String), Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef, +{ + let next_part = parse_template_one_step(0, source.as_bytes(), &(0..source.len()))?; + + if let Some(part) = next_part { + let mut output = Vec::with_capacity(source.len() + source.len() / 10); + expand_template_part(&part, &mut output, variables, &|x| x.as_ref().as_bytes())?; + // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. + Ok((part.size(), unsafe { String::from_utf8_unchecked(output) })) + } else { + Ok((0, "".into())) + } +} + /// A parsed variable. -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] struct Variable<'a> { + /// template part start + part_start: usize, + + /// The end position of the entire variable in the source. + part_end: usize, + /// The name of the variable. name: &'a str, @@ -169,10 +363,7 @@ struct Variable<'a> { name_start: usize, /// The default value of the variable. - default: Option>, - - /// The end position of the entire variable in the source. - end_position: usize, + default: Option>>, } /// Parse a variable from source at the given position. @@ -207,7 +398,8 @@ fn parse_variable(source: &[u8], finger: usize) -> Result { name: std::str::from_utf8(&source[finger + 1..name_end]).unwrap(), name_start: finger + 1, default: None, - end_position: name_end, + part_start: finger, + part_end: name_end, }) } } @@ -252,7 +444,8 @@ fn parse_braced_variable(source: &[u8], finger: usize) -> Result Result = BTreeMap::new(); map.insert("name".into(), "world".into()); - check!(let Ok("Hello cruel world!") = substitute("Hello ${not_name:cruel $name}!", &map).as_deref()); + assert_eq!( + Ok("Hello cruel world!"), + substitute("Hello ${not_name:cruel $name}!", &map).as_deref() + ); } #[test] @@ -591,7 +790,7 @@ mod test { let variables: &dyn VariableMap = &variables; let_assert!(Ok(expanded) = substitute("one ${aap}", variables)); - assert!(expanded == "one noot"); + assert_eq!(expanded, "one noot"); } #[test] @@ -607,4 +806,42 @@ mod test { r" ^^^", "\n", )); } + + #[test] + fn test_substitute_one_step_variable_and_escape_sequence() { + let mut variables = BTreeMap::new(); + variables.insert(String::from("NAME"), String::from("subst")); + + let source = r"hello $NAME. Nice\$to meet you $NAME."; + assert_eq!(substitute_one(source, &variables).unwrap(), (6, "hello ".into())); + assert_eq!(substitute_one(&source[6..], &variables).unwrap(), (5, "subst".into())); + assert_eq!( + substitute_one(&source[6 + 5..], &variables).unwrap(), + (6, ". Nice".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6..], &variables).unwrap(), + (2, "$".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2..], &variables).unwrap(), + (12, "to meet you ".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2 + 12..], &variables).unwrap(), + (5, "subst".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2 + 12 + 5..], &variables).unwrap(), + (1, ".".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2 + 12 + 5 + 1..], &variables).unwrap(), + (0, "".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2 + 12 + 5 + 1..], &variables).unwrap(), + (0, "".into()) + ); + } }