From 3efe2a47ee6d0f8672de6cc0d8795b377340995f Mon Sep 17 00:00:00 2001 From: Ulrich Hornung Date: Sun, 7 Jan 2024 18:22:53 +0100 Subject: [PATCH 1/7] substitute_one_step: stop after first variable has been substituted - try 2 --- src/lib.rs | 167 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 138 insertions(+), 29 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index aaa3353..be54859 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,6 +55,7 @@ #![warn(missing_docs, missing_debug_implementations)] pub mod error; + pub use error::Error; mod map; @@ -63,6 +64,24 @@ pub use map::*; #[cfg(feature = "yaml")] pub mod yaml; +/// When using substitute_one_step, this reports about the +/// type of substitution. +#[derive(Debug, PartialEq, Eq)] +pub enum SubstitutionType { + /// a escape sequence was replace. E.g. "\$" + UnescapeOne, + /// a variable was expanded. E.g. ${VAR} + Variable, +} + +/// result of single step substitution (one at a time) +#[derive(Debug, PartialEq, Eq)] +pub struct SubstituteOneStepResult { + slice_before_ends: usize, + slice_after_starts: usize, + subst_type: SubstitutionType, +} + /// Substitute variables in a string. /// /// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. @@ -86,6 +105,34 @@ where unsafe { Ok(String::from_utf8_unchecked(output)) } } +/// Does one sub-step of substitute +/// +/// Returns Some((replacement_string, next_position_in_source_str_after_variable_name)) when succeeded. +/// Returns None if no substitution was possible. +pub fn substitute_one_step<'a, M>( + source: &str, + variables: &'a M, +) -> Result, Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef, +{ + let result_option = substitute_impl_one_step(0, source.as_bytes(), &(0..source.len()), variables, &|x| { + x.as_ref().as_bytes() + })?; + + if result_option.is_none() { + return Ok(None); + } + + let result = result_option.unwrap(); + + // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. + let output_str = unsafe { String::from_utf8_unchecked(result.0) }; + + Ok(Some((output_str, result.1))) +} + /// Substitute variables in a byte string. /// /// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. @@ -107,6 +154,56 @@ where Ok(output) } +fn substitute_impl_one_step<'a, M, F>( + finger: usize, + source: &[u8], + range: &std::ops::Range, + variables: &'a M, + to_bytes: &F, +) -> Result, SubstituteOneStepResult)>, Error> +where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], +{ + let next = match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { + Some(x) => finger + x, + None => return Ok(None), + }; + + let mut output = Vec::new(); + if source[next] == b'\\' { + output.push(unescape_one(source, next)?); + Ok(Some((output, SubstituteOneStepResult { + slice_before_ends: next, + slice_after_starts: next + 2, + subst_type: SubstitutionType::UnescapeOne, + }))) + } else { + let variable = parse_variable(source, next)?; + let value = variables.get(variable.name); + match (&value, &variable.default) { + (None, None) => { + return Err(error::NoSuchVariable { + position: variable.name_start, + name: variable.name.to_owned(), + } + .into()) + }, + (Some(value), _) => { + output.extend_from_slice(to_bytes(value)); + }, + (None, Some(default)) => { + substitute_impl(&mut output, source, default.clone(), variables, to_bytes)?; + }, + }; + Ok(Some((output, SubstituteOneStepResult { + slice_before_ends: next, + slice_after_starts: variable.end_position, + subst_type: SubstitutionType::Variable, + }))) + } +} + /// Substitute variables in a byte string. /// /// This is the real implementation used by both [`substitute`] and [`substitute_bytes`]. @@ -124,34 +221,13 @@ where { let mut finger = range.start; while finger < range.end { - let next = match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { - Some(x) => finger + x, - None => break, - }; - - output.extend_from_slice(&source[finger..next]); - if source[next] == b'\\' { - output.push(unescape_one(source, next)?); - finger = next + 2; + let new_finger_option = substitute_impl_one_step(finger, source, &range, variables, to_bytes)?; + if let Some((mut expanded_value, metadata)) = new_finger_option { + output.extend_from_slice(source.get(range.start..metadata.slice_before_ends).unwrap()); + output.append(&mut expanded_value); + finger = metadata.slice_after_starts; } else { - let variable = parse_variable(source, next)?; - let value = variables.get(variable.name); - match (&value, &variable.default) { - (None, None) => { - return Err(error::NoSuchVariable { - position: variable.name_start, - name: variable.name.to_owned(), - } - .into()) - }, - (Some(value), _) => { - output.extend_from_slice(to_bytes(value)); - }, - (None, Some(default)) => { - substitute_impl(output, source, default.clone(), variables, to_bytes)?; - }, - }; - finger = variable.end_position; + break; } } @@ -418,7 +494,10 @@ mod test { fn substitution_in_default_value() { let mut map: BTreeMap = BTreeMap::new(); map.insert("name".into(), "world".into()); - check!(let Ok("Hello cruel world!") = substitute("Hello ${not_name:cruel $name}!", &map).as_deref()); + assert_eq!( + Ok("Hello cruel world!"), + substitute("Hello ${not_name:cruel $name}!", &map).as_deref() + ); } #[test] @@ -591,7 +670,7 @@ mod test { let variables: &dyn VariableMap = &variables; let_assert!(Ok(expanded) = substitute("one ${aap}", variables)); - assert!(expanded == "one noot"); + assert_eq!(expanded, "one noot"); } #[test] @@ -607,4 +686,34 @@ mod test { r" ^^^", "\n", )); } + + #[test] + fn test_substitute_one_step_variable_and_escape_sequence() { + let mut variables = BTreeMap::new(); + variables.insert(String::from("NAME"), String::from("subst")); + + let source = r"hello $NAME. Nice\$to meet you $NAME."; + let result = substitute_one_step(source, &variables).unwrap().unwrap(); + assert_eq!(result.0, "subst"); + assert_eq!(result.1.slice_before_ends, 6); + assert_eq!(result.1.slice_after_starts, 11); + assert_eq!(result.1.subst_type, SubstitutionType::Variable); + + let result = substitute_one_step(source.get(result.1.slice_after_starts..).unwrap(), &variables) + .unwrap() + .unwrap(); + assert_eq!(result.0, "$"); + assert_eq!(result.1.slice_before_ends, 6); + assert_eq!(result.1.slice_after_starts, 8); + assert_eq!(result.1.subst_type, SubstitutionType::UnescapeOne); + } + + #[test] + fn test_substitute_one_step_no_substitution() { + let variables: BTreeMap = BTreeMap::new(); + + let source = r"hello world"; + let result = substitute_one_step(source, &variables).unwrap(); + assert!(result.is_none()); + } } From 38cb1cba4c878bf5699ecfd0922de8ed554f8248 Mon Sep 17 00:00:00 2001 From: Ulrich Hornung Date: Sun, 7 Jan 2024 22:54:01 +0100 Subject: [PATCH 2/7] parse template based implementation --- src/lib.rs | 229 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 184 insertions(+), 45 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index be54859..ecd9b58 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -154,32 +154,91 @@ where Ok(output) } -fn substitute_impl_one_step<'a, M, F>( +#[derive(Debug)] +enum TemplatePart<'a> { + Literal(LiteralTemplate<'a>), + Variable(Variable<'a>), + EscapedChar(EscapedCharTemplate), +} + +#[derive(Debug)] +struct LiteralTemplate<'a> { + text: &'a [u8], +} + +trait ByteLength { + fn size(&self) -> usize; +} + +impl<'a> ByteLength for TemplatePart<'a> { + fn size(&self) -> usize { + match self { + Self::Literal(l) => l.text.len(), + Self::Variable(v) => v.part_end - v.part_start, + Self::EscapedChar(_e) => 2, + } + } +} + +#[derive(Debug)] +struct EscapedCharTemplate { + name: u8, +} + +fn parse_template_one_step<'a>( finger: usize, - source: &[u8], + source: &'a [u8], + range: &std::ops::Range, +) -> Result>, Error> +{ + if finger >= range.end { + return Ok(None); // end of input is reached + } + + let c = source.get(finger).unwrap(); + + let part: TemplatePart = match c { + b'$' => TemplatePart::Variable(parse_variable(source, finger)?), + b'\\' => { + let c = unescape_one(source, finger)?; + TemplatePart::EscapedChar(EscapedCharTemplate { name: c }) + } + _c0 => { + match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { + Some(x) => TemplatePart::Literal(LiteralTemplate { text: &source[finger..finger + x] }), + None => TemplatePart::Literal(LiteralTemplate{ text: &source[finger..range.end] }) + } + } + }; + + Ok(Some(part)) +} + +fn parse_template<'a>( + source: &'a [u8], range: &std::ops::Range, +) -> Result>, Error> +{ + let mut parts = Vec::new(); + let mut finger = range.start; + while let Some(part) = parse_template_one_step(finger, source, range)? { + finger = finger + part.size(); + parts.push(part); + } + + Ok(parts) +} + +fn evaluate_template_part_variable<'a, M, F>( + variable: &Variable, + output: &mut Vec, variables: &'a M, to_bytes: &F, -) -> Result, SubstituteOneStepResult)>, Error> +) -> Result<(), Error> where M: VariableMap<'a> + ?Sized, F: Fn(&M::Value) -> &[u8], { - let next = match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { - Some(x) => finger + x, - None => return Ok(None), - }; - - let mut output = Vec::new(); - if source[next] == b'\\' { - output.push(unescape_one(source, next)?); - Ok(Some((output, SubstituteOneStepResult { - slice_before_ends: next, - slice_after_starts: next + 2, - subst_type: SubstitutionType::UnescapeOne, - }))) - } else { - let variable = parse_variable(source, next)?; let value = variables.get(variable.name); match (&value, &variable.default) { (None, None) => { @@ -193,14 +252,98 @@ where output.extend_from_slice(to_bytes(value)); }, (None, Some(default)) => { - substitute_impl(&mut output, source, default.clone(), variables, to_bytes)?; + evaluate_template(default, output, variables, to_bytes)?; }, - }; + } + + Ok(()) +} + +fn evaluate_template_part_escaped_char<'a>( + e: &EscapedCharTemplate, + output: &mut Vec, +) -> Result<(), Error> +{ + output.push(e.name); + Ok(()) +} + +fn evaluate_template_part_literal<'a>( + l: &LiteralTemplate, + output: &mut Vec, +) -> Result<(), Error> +{ + output.extend_from_slice(l.text); + Ok(()) +} + +fn evaluate_template_part<'a, M, F>( + tp: &TemplatePart, + output: &mut Vec, + variables: &'a M, + to_bytes: &F, +) -> Result<(), Error> +where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], +{ + match tp { + TemplatePart::Literal(l) => evaluate_template_part_literal(l, output)?, + TemplatePart::Variable(v) => evaluate_template_part_variable(v, output, variables, to_bytes)?, + TemplatePart::EscapedChar(e) => evaluate_template_part_escaped_char(e, output)?, + } + + Ok(()) +} + +fn evaluate_template<'a, M, F>( + t: &Vec, + output: &mut Vec, + variables: &'a M, + to_bytes: &F, +) -> Result<(), Error> +where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], +{ + for part in t { + evaluate_template_part(part, output, variables, to_bytes)?; + } + + Ok(()) +} + +fn substitute_impl_one_step<'a, M, F>( + finger: usize, + source: &[u8], + range: &std::ops::Range, + variables: &'a M, + to_bytes: &F, +) -> Result, SubstituteOneStepResult)>, Error> +where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], +{ + let mut finger = finger; + let mut output = Vec::::new(); + let mut part = parse_template_one_step(finger, source, range)?; + match &part { + None => return Ok(None), + Some(TemplatePart::Literal(_l)) => { + finger += part.unwrap().size(); + part = parse_template_one_step(finger, source, range)?; + } + Some(_) => {} + } + if let Some(part) = part { + evaluate_template_part(&part, &mut output, variables, to_bytes)?; Ok(Some((output, SubstituteOneStepResult { - slice_before_ends: next, - slice_after_starts: variable.end_position, - subst_type: SubstitutionType::Variable, + slice_before_ends: finger, + slice_after_starts: finger + part.size(), + subst_type: if let TemplatePart::EscapedChar(_) = part { SubstitutionType::UnescapeOne } else { SubstitutionType::Variable }, }))) + } else { + Ok(None) } } @@ -219,25 +362,19 @@ where M: VariableMap<'a> + ?Sized, F: Fn(&M::Value) -> &[u8], { - let mut finger = range.start; - while finger < range.end { - let new_finger_option = substitute_impl_one_step(finger, source, &range, variables, to_bytes)?; - if let Some((mut expanded_value, metadata)) = new_finger_option { - output.extend_from_slice(source.get(range.start..metadata.slice_before_ends).unwrap()); - output.append(&mut expanded_value); - finger = metadata.slice_after_starts; - } else { - break; - } + let parts = parse_template(source, &range)?; + evaluate_template(&parts, output, variables, to_bytes) } - output.extend_from_slice(&source[finger..range.end]); - Ok(()) -} - /// A parsed variable. #[derive(Debug)] struct Variable<'a> { + /// template part start + part_start: usize, + + /// The end position of the entire variable in the source. + part_end: usize, + /// The name of the variable. name: &'a str, @@ -245,10 +382,7 @@ struct Variable<'a> { name_start: usize, /// The default value of the variable. - default: Option>, - - /// The end position of the entire variable in the source. - end_position: usize, + default: Option>>, } /// Parse a variable from source at the given position. @@ -283,7 +417,8 @@ fn parse_variable(source: &[u8], finger: usize) -> Result { name: std::str::from_utf8(&source[finger + 1..name_end]).unwrap(), name_start: finger + 1, default: None, - end_position: name_end, + part_start: finger, + part_end: name_end, }) } } @@ -328,7 +463,8 @@ fn parse_braced_variable(source: &[u8], finger: usize) -> Result Result Date: Sun, 7 Jan 2024 23:09:03 +0100 Subject: [PATCH 3/7] partially adapted tests --- src/lib.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ecd9b58..bf28905 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -154,14 +154,14 @@ where Ok(output) } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] enum TemplatePart<'a> { Literal(LiteralTemplate<'a>), Variable(Variable<'a>), EscapedChar(EscapedCharTemplate), } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] struct LiteralTemplate<'a> { text: &'a [u8], } @@ -180,7 +180,7 @@ impl<'a> ByteLength for TemplatePart<'a> { } } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] struct EscapedCharTemplate { name: u8, } @@ -367,7 +367,7 @@ where } /// A parsed variable. -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] struct Variable<'a> { /// template part start part_start: usize, @@ -832,6 +832,15 @@ mod test { variables.insert(String::from("NAME"), String::from("subst")); let source = r"hello $NAME. Nice\$to meet you $NAME."; + let range = 0..source.len(); + let mut finger = 0; + let result = parse_template_one_step(0, source.as_bytes(), &range).unwrap().unwrap(); + assert_eq!(result, TemplatePart::Literal(LiteralTemplate { text: &source.as_bytes()[0..6] })); + finger += result.size(); + let result = parse_template_one_step(finger, source.as_bytes(), &range).unwrap().unwrap(); + assert_eq!(result, TemplatePart::Variable(Variable { part_start: finger, name_start: finger+1, name: "NAME", part_end: finger+5, default: None })); + evaluate_template_part(&result, output, variables, to_bytes); + let result = substitute_one_step(source, &variables).unwrap().unwrap(); assert_eq!(result.0, "subst"); assert_eq!(result.1.slice_before_ends, 6); From 769bbb6b2deb335f0eb47b891bf652aa76f27e40 Mon Sep 17 00:00:00 2001 From: Ulrich Hornung Date: Mon, 8 Jan 2024 00:44:04 +0100 Subject: [PATCH 4/7] working template based --- src/lib.rs | 228 ++++++++++++++++++++++------------------------------- 1 file changed, 96 insertions(+), 132 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bf28905..7ddf37d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,24 +64,6 @@ pub use map::*; #[cfg(feature = "yaml")] pub mod yaml; -/// When using substitute_one_step, this reports about the -/// type of substitution. -#[derive(Debug, PartialEq, Eq)] -pub enum SubstitutionType { - /// a escape sequence was replace. E.g. "\$" - UnescapeOne, - /// a variable was expanded. E.g. ${VAR} - Variable, -} - -/// result of single step substitution (one at a time) -#[derive(Debug, PartialEq, Eq)] -pub struct SubstituteOneStepResult { - slice_before_ends: usize, - slice_after_starts: usize, - subst_type: SubstitutionType, -} - /// Substitute variables in a string. /// /// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. @@ -97,40 +79,8 @@ where M: VariableMap<'a> + ?Sized, M::Value: AsRef, { - let mut output = Vec::with_capacity(source.len() + source.len() / 10); - substitute_impl(&mut output, source.as_bytes(), 0..source.len(), variables, &|x| { - x.as_ref().as_bytes() - })?; - // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. - unsafe { Ok(String::from_utf8_unchecked(output)) } -} - -/// Does one sub-step of substitute -/// -/// Returns Some((replacement_string, next_position_in_source_str_after_variable_name)) when succeeded. -/// Returns None if no substitution was possible. -pub fn substitute_one_step<'a, M>( - source: &str, - variables: &'a M, -) -> Result, Error> -where - M: VariableMap<'a> + ?Sized, - M::Value: AsRef, -{ - let result_option = substitute_impl_one_step(0, source.as_bytes(), &(0..source.len()), variables, &|x| { - x.as_ref().as_bytes() - })?; - - if result_option.is_none() { - return Ok(None); - } - - let result = result_option.unwrap(); - - // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. - let output_str = unsafe { String::from_utf8_unchecked(result.0) }; - - Ok(Some((output_str, result.1))) + let template = Template::parse(source)?; + template.expand(variables) } /// Substitute variables in a byte string. @@ -149,8 +99,9 @@ where M: VariableMap<'a> + ?Sized, M::Value: AsRef<[u8]>, { + let template = parse_template(source, &(0..source.len()))?; let mut output = Vec::with_capacity(source.len() + source.len() / 10); - substitute_impl(&mut output, source, 0..source.len(), variables, &|x| x.as_ref())?; + evaluate_template_impl(&template, &mut output, variables, &|x| x.as_ref())?; Ok(output) } @@ -173,13 +124,26 @@ trait ByteLength { impl<'a> ByteLength for TemplatePart<'a> { fn size(&self) -> usize { match self { - Self::Literal(l) => l.text.len(), - Self::Variable(v) => v.part_end - v.part_start, - Self::EscapedChar(_e) => 2, + Self::Literal(l) => l.size(), + Self::Variable(v) => v.size(), + Self::EscapedChar(e) => e.size(), } } } +impl<'a> ByteLength for LiteralTemplate<'a> { + fn size(&self) -> usize { self.text.len() } +} + +impl<'a> ByteLength for Variable<'a> { + fn size(&self) -> usize { self.part_end - self.part_start } +} + +impl ByteLength for EscapedCharTemplate { + fn size(&self) -> usize { 2 } +} + + #[derive(Debug, PartialEq, Eq)] struct EscapedCharTemplate { name: u8, @@ -214,6 +178,36 @@ fn parse_template_one_step<'a>( Ok(Some(part)) } +/// This class can be constructed by providing a template input string. +/// This input string is parsed into TemplateParts which are stored in memory. +/// With calling `expand` the template gets instantiated by substitution of the variables and escape sequences. +#[derive(Debug)] +pub struct Template<'a> { + source: &'a [u8], + parts: Vec>, +} + +impl<'a> Template<'a> { + /// Creates a new template from a string + pub fn parse(source: &'a str) -> Result { + Ok(Self { + source: source.as_bytes(), + parts: parse_template( + source.as_bytes(), + &(0..source.len()))?, + }) + } + + /// expands all the fields in the template and returns result + pub fn expand<'b, M>(&self, variables: &'b M) -> Result + where + M: VariableMap<'b> + ?Sized, + M::Value: AsRef, + { + expand_template_simple(&self.parts, variables, Some(self.source.len())) + } +} + fn parse_template<'a>( source: &'a [u8], range: &std::ops::Range, @@ -252,7 +246,7 @@ where output.extend_from_slice(to_bytes(value)); }, (None, Some(default)) => { - evaluate_template(default, output, variables, to_bytes)?; + evaluate_template_impl(default, output, variables, to_bytes)?; }, } @@ -296,7 +290,7 @@ where Ok(()) } -fn evaluate_template<'a, M, F>( +fn evaluate_template_impl<'a, M, F>( t: &Vec, output: &mut Vec, variables: &'a M, @@ -313,58 +307,50 @@ where Ok(()) } -fn substitute_impl_one_step<'a, M, F>( - finger: usize, - source: &[u8], - range: &std::ops::Range, - variables: &'a M, - to_bytes: &F, -) -> Result, SubstituteOneStepResult)>, Error> +/// takes a template and variable map to generate output +fn expand_template_simple<'a, M>(t: &Vec, variables: &'a M, source_size: Option) -> Result where M: VariableMap<'a> + ?Sized, - F: Fn(&M::Value) -> &[u8], + M::Value: AsRef, { - let mut finger = finger; - let mut output = Vec::::new(); - let mut part = parse_template_one_step(finger, source, range)?; - match &part { - None => return Ok(None), - Some(TemplatePart::Literal(_l)) => { - finger += part.unwrap().size(); - part = parse_template_one_step(finger, source, range)?; - } - Some(_) => {} - } - if let Some(part) = part { - evaluate_template_part(&part, &mut output, variables, to_bytes)?; - Ok(Some((output, SubstituteOneStepResult { - slice_before_ends: finger, - slice_after_starts: finger + part.size(), - subst_type: if let TemplatePart::EscapedChar(_) = part { SubstitutionType::UnescapeOne } else { SubstitutionType::Variable }, - }))) - } else { - Ok(None) - } + let output = evaluate_template_simple_impl(t, variables, source_size)?; + // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. + unsafe { Ok(String::from_utf8_unchecked(output)) } } -/// Substitute variables in a byte string. -/// -/// This is the real implementation used by both [`substitute`] and [`substitute_bytes`]. -/// The function accepts any type that implements [`VariableMap`], and a function to convert the value from the map into bytes. -fn substitute_impl<'a, M, F>( - output: &mut Vec, - source: &[u8], - range: std::ops::Range, - variables: &'a M, - to_bytes: &F, -) -> Result<(), Error> +fn evaluate_template_simple_impl<'a, M>(t: &Vec, variables: &'a M, source_size: Option) -> Result, Error> where M: VariableMap<'a> + ?Sized, - F: Fn(&M::Value) -> &[u8], + M::Value: AsRef, { - let parts = parse_template(source, &range)?; - evaluate_template(&parts, output, variables, to_bytes) + let source_size = if let Some(source_size) = source_size { source_size } else {0}; + let mut output = Vec::with_capacity(source_size + source_size / 10); + evaluate_template_impl(t, &mut output, variables, &|x| { + x.as_ref().as_bytes() + })?; + Ok(output) +} + +/// does one sub-step of substitute. +pub fn substitute_one<'a, M>(source: &str, variables: &'a M) -> Result<(usize, String), Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef, +{ + let next_part = parse_template_one_step( + 0, source.as_bytes(), &(0..source.len()))?; + + if let Some(part) = next_part { + let mut output = Vec::with_capacity(source.len() + source.len() / 10); + evaluate_template_part(&part, &mut output, variables, &|x| { + x.as_ref().as_bytes() + })?; + // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. + Ok((part.size(), unsafe { String::from_utf8_unchecked(output) })) + } else { + Ok((0, "".into())) } +} /// A parsed variable. #[derive(Debug, PartialEq, Eq)] @@ -832,36 +818,14 @@ mod test { variables.insert(String::from("NAME"), String::from("subst")); let source = r"hello $NAME. Nice\$to meet you $NAME."; - let range = 0..source.len(); - let mut finger = 0; - let result = parse_template_one_step(0, source.as_bytes(), &range).unwrap().unwrap(); - assert_eq!(result, TemplatePart::Literal(LiteralTemplate { text: &source.as_bytes()[0..6] })); - finger += result.size(); - let result = parse_template_one_step(finger, source.as_bytes(), &range).unwrap().unwrap(); - assert_eq!(result, TemplatePart::Variable(Variable { part_start: finger, name_start: finger+1, name: "NAME", part_end: finger+5, default: None })); - evaluate_template_part(&result, output, variables, to_bytes); - - let result = substitute_one_step(source, &variables).unwrap().unwrap(); - assert_eq!(result.0, "subst"); - assert_eq!(result.1.slice_before_ends, 6); - assert_eq!(result.1.slice_after_starts, 11); - assert_eq!(result.1.subst_type, SubstitutionType::Variable); - - let result = substitute_one_step(source.get(result.1.slice_after_starts..).unwrap(), &variables) - .unwrap() - .unwrap(); - assert_eq!(result.0, "$"); - assert_eq!(result.1.slice_before_ends, 6); - assert_eq!(result.1.slice_after_starts, 8); - assert_eq!(result.1.subst_type, SubstitutionType::UnescapeOne); - } - - #[test] - fn test_substitute_one_step_no_substitution() { - let variables: BTreeMap = BTreeMap::new(); - - let source = r"hello world"; - let result = substitute_one_step(source, &variables).unwrap(); - assert!(result.is_none()); + assert_eq!(substitute_one(source, &variables).unwrap(), (6, "hello ".into())); + assert_eq!(substitute_one(&source[6..], &variables).unwrap(), (5, "subst".into())); + assert_eq!(substitute_one(&source[6+5..], &variables).unwrap(), (6, ". Nice".into())); + assert_eq!(substitute_one(&source[6+5+6..], &variables).unwrap(), (2, "$".into())); + assert_eq!(substitute_one(&source[6+5+6+2..], &variables).unwrap(), (12, "to meet you ".into())); + assert_eq!(substitute_one(&source[6+5+6+2+12..], &variables).unwrap(), (5, "subst".into())); + assert_eq!(substitute_one(&source[6+5+6+2+12+5..], &variables).unwrap(), (1, ".".into())); + assert_eq!(substitute_one(&source[6+5+6+2+12+5+1..], &variables).unwrap(), (0, "".into())); + assert_eq!(substitute_one(&source[6+5+6+2+12+5+1..], &variables).unwrap(), (0, "".into())); } } From 87313a61d91e3c3896dcbe35b6ee562e120f86f3 Mon Sep 17 00:00:00 2001 From: Ulrich Hornung Date: Mon, 8 Jan 2024 00:50:03 +0100 Subject: [PATCH 5/7] renamings --- src/lib.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7ddf37d..202d72f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -101,7 +101,7 @@ where { let template = parse_template(source, &(0..source.len()))?; let mut output = Vec::with_capacity(source.len() + source.len() / 10); - evaluate_template_impl(&template, &mut output, variables, &|x| x.as_ref())?; + expand_template_impl(&template, &mut output, variables, &|x| x.as_ref())?; Ok(output) } @@ -204,7 +204,7 @@ impl<'a> Template<'a> { M: VariableMap<'b> + ?Sized, M::Value: AsRef, { - expand_template_simple(&self.parts, variables, Some(self.source.len())) + expand_template(&self.parts, variables, Some(self.source.len())) } } @@ -223,7 +223,7 @@ fn parse_template<'a>( Ok(parts) } -fn evaluate_template_part_variable<'a, M, F>( +fn expand_template_part_variable<'a, M, F>( variable: &Variable, output: &mut Vec, variables: &'a M, @@ -246,14 +246,14 @@ where output.extend_from_slice(to_bytes(value)); }, (None, Some(default)) => { - evaluate_template_impl(default, output, variables, to_bytes)?; + expand_template_impl(default, output, variables, to_bytes)?; }, } Ok(()) } -fn evaluate_template_part_escaped_char<'a>( +fn expand_template_part_escaped_char<'a>( e: &EscapedCharTemplate, output: &mut Vec, ) -> Result<(), Error> @@ -262,7 +262,7 @@ fn evaluate_template_part_escaped_char<'a>( Ok(()) } -fn evaluate_template_part_literal<'a>( +fn expand_template_part_literal<'a>( l: &LiteralTemplate, output: &mut Vec, ) -> Result<(), Error> @@ -271,7 +271,7 @@ fn evaluate_template_part_literal<'a>( Ok(()) } -fn evaluate_template_part<'a, M, F>( +fn expand_template_part<'a, M, F>( tp: &TemplatePart, output: &mut Vec, variables: &'a M, @@ -282,15 +282,15 @@ where F: Fn(&M::Value) -> &[u8], { match tp { - TemplatePart::Literal(l) => evaluate_template_part_literal(l, output)?, - TemplatePart::Variable(v) => evaluate_template_part_variable(v, output, variables, to_bytes)?, - TemplatePart::EscapedChar(e) => evaluate_template_part_escaped_char(e, output)?, + TemplatePart::Literal(l) => expand_template_part_literal(l, output)?, + TemplatePart::Variable(v) => expand_template_part_variable(v, output, variables, to_bytes)?, + TemplatePart::EscapedChar(e) => expand_template_part_escaped_char(e, output)?, } Ok(()) } -fn evaluate_template_impl<'a, M, F>( +fn expand_template_impl<'a, M, F>( t: &Vec, output: &mut Vec, variables: &'a M, @@ -301,31 +301,31 @@ where F: Fn(&M::Value) -> &[u8], { for part in t { - evaluate_template_part(part, output, variables, to_bytes)?; + expand_template_part(part, output, variables, to_bytes)?; } Ok(()) } /// takes a template and variable map to generate output -fn expand_template_simple<'a, M>(t: &Vec, variables: &'a M, source_size: Option) -> Result +fn expand_template<'a, M>(t: &Vec, variables: &'a M, source_size: Option) -> Result where M: VariableMap<'a> + ?Sized, M::Value: AsRef, { - let output = evaluate_template_simple_impl(t, variables, source_size)?; + let output = evaluate_template_to_bytes(t, variables, source_size)?; // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. unsafe { Ok(String::from_utf8_unchecked(output)) } } -fn evaluate_template_simple_impl<'a, M>(t: &Vec, variables: &'a M, source_size: Option) -> Result, Error> +fn evaluate_template_to_bytes<'a, M>(t: &Vec, variables: &'a M, source_size: Option) -> Result, Error> where M: VariableMap<'a> + ?Sized, M::Value: AsRef, { let source_size = if let Some(source_size) = source_size { source_size } else {0}; let mut output = Vec::with_capacity(source_size + source_size / 10); - evaluate_template_impl(t, &mut output, variables, &|x| { + expand_template_impl(t, &mut output, variables, &|x| { x.as_ref().as_bytes() })?; Ok(output) @@ -342,7 +342,7 @@ where if let Some(part) = next_part { let mut output = Vec::with_capacity(source.len() + source.len() / 10); - evaluate_template_part(&part, &mut output, variables, &|x| { + expand_template_part(&part, &mut output, variables, &|x| { x.as_ref().as_bytes() })?; // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. From a0513d9eb4fe41dc7ce77b1ba80a3c294bc3a56a Mon Sep 17 00:00:00 2001 From: Ulrich Hornung Date: Mon, 8 Jan 2024 00:53:05 +0100 Subject: [PATCH 6/7] cargo fmt --- src/lib.rs | 154 +++++++++++++++++++++++++++++------------------------ 1 file changed, 85 insertions(+), 69 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 202d72f..0cbebd0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,14 +107,14 @@ where #[derive(Debug, PartialEq, Eq)] enum TemplatePart<'a> { - Literal(LiteralTemplate<'a>), - Variable(Variable<'a>), - EscapedChar(EscapedCharTemplate), + Literal(LiteralTemplate<'a>), + Variable(Variable<'a>), + EscapedChar(EscapedCharTemplate), } #[derive(Debug, PartialEq, Eq)] struct LiteralTemplate<'a> { - text: &'a [u8], + text: &'a [u8], } trait ByteLength { @@ -122,28 +122,33 @@ trait ByteLength { } impl<'a> ByteLength for TemplatePart<'a> { - fn size(&self) -> usize { + fn size(&self) -> usize { match self { Self::Literal(l) => l.size(), Self::Variable(v) => v.size(), Self::EscapedChar(e) => e.size(), } - } + } } impl<'a> ByteLength for LiteralTemplate<'a> { - fn size(&self) -> usize { self.text.len() } + fn size(&self) -> usize { + self.text.len() + } } impl<'a> ByteLength for Variable<'a> { - fn size(&self) -> usize { self.part_end - self.part_start } + fn size(&self) -> usize { + self.part_end - self.part_start + } } impl ByteLength for EscapedCharTemplate { - fn size(&self) -> usize { 2 } + fn size(&self) -> usize { + 2 + } } - #[derive(Debug, PartialEq, Eq)] struct EscapedCharTemplate { name: u8, @@ -153,8 +158,7 @@ fn parse_template_one_step<'a>( finger: usize, source: &'a [u8], range: &std::ops::Range, -) -> Result>, Error> -{ +) -> Result>, Error> { if finger >= range.end { return Ok(None); // end of input is reached } @@ -164,15 +168,17 @@ fn parse_template_one_step<'a>( let part: TemplatePart = match c { b'$' => TemplatePart::Variable(parse_variable(source, finger)?), b'\\' => { - let c = unescape_one(source, finger)?; + let c = unescape_one(source, finger)?; TemplatePart::EscapedChar(EscapedCharTemplate { name: c }) - } - _c0 => { - match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { - Some(x) => TemplatePart::Literal(LiteralTemplate { text: &source[finger..finger + x] }), - None => TemplatePart::Literal(LiteralTemplate{ text: &source[finger..range.end] }) - } - } + }, + _c0 => match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { + Some(x) => TemplatePart::Literal(LiteralTemplate { + text: &source[finger..finger + x], + }), + None => TemplatePart::Literal(LiteralTemplate { + text: &source[finger..range.end], + }), + }, }; Ok(Some(part)) @@ -192,9 +198,7 @@ impl<'a> Template<'a> { pub fn parse(source: &'a str) -> Result { Ok(Self { source: source.as_bytes(), - parts: parse_template( - source.as_bytes(), - &(0..source.len()))?, + parts: parse_template(source.as_bytes(), &(0..source.len()))?, }) } @@ -208,11 +212,7 @@ impl<'a> Template<'a> { } } -fn parse_template<'a>( - source: &'a [u8], - range: &std::ops::Range, -) -> Result>, Error> -{ +fn parse_template<'a>(source: &'a [u8], range: &std::ops::Range) -> Result>, Error> { let mut parts = Vec::new(); let mut finger = range.start; while let Some(part) = parse_template_one_step(finger, source, range)? { @@ -233,40 +233,32 @@ where M: VariableMap<'a> + ?Sized, F: Fn(&M::Value) -> &[u8], { - let value = variables.get(variable.name); - match (&value, &variable.default) { - (None, None) => { - return Err(error::NoSuchVariable { - position: variable.name_start, - name: variable.name.to_owned(), - } - .into()) - }, - (Some(value), _) => { - output.extend_from_slice(to_bytes(value)); - }, - (None, Some(default)) => { + let value = variables.get(variable.name); + match (&value, &variable.default) { + (None, None) => { + return Err(error::NoSuchVariable { + position: variable.name_start, + name: variable.name.to_owned(), + } + .into()) + }, + (Some(value), _) => { + output.extend_from_slice(to_bytes(value)); + }, + (None, Some(default)) => { expand_template_impl(default, output, variables, to_bytes)?; - }, + }, } Ok(()) } -fn expand_template_part_escaped_char<'a>( - e: &EscapedCharTemplate, - output: &mut Vec, -) -> Result<(), Error> -{ +fn expand_template_part_escaped_char<'a>(e: &EscapedCharTemplate, output: &mut Vec) -> Result<(), Error> { output.push(e.name); Ok(()) } -fn expand_template_part_literal<'a>( - l: &LiteralTemplate, - output: &mut Vec, -) -> Result<(), Error> -{ +fn expand_template_part_literal<'a>(l: &LiteralTemplate, output: &mut Vec) -> Result<(), Error> { output.extend_from_slice(l.text); Ok(()) } @@ -318,16 +310,22 @@ where unsafe { Ok(String::from_utf8_unchecked(output)) } } -fn evaluate_template_to_bytes<'a, M>(t: &Vec, variables: &'a M, source_size: Option) -> Result, Error> +fn evaluate_template_to_bytes<'a, M>( + t: &Vec, + variables: &'a M, + source_size: Option, +) -> Result, Error> where M: VariableMap<'a> + ?Sized, M::Value: AsRef, { - let source_size = if let Some(source_size) = source_size { source_size } else {0}; + let source_size = if let Some(source_size) = source_size { + source_size + } else { + 0 + }; let mut output = Vec::with_capacity(source_size + source_size / 10); - expand_template_impl(t, &mut output, variables, &|x| { - x.as_ref().as_bytes() - })?; + expand_template_impl(t, &mut output, variables, &|x| x.as_ref().as_bytes())?; Ok(output) } @@ -337,14 +335,11 @@ where M: VariableMap<'a> + ?Sized, M::Value: AsRef, { - let next_part = parse_template_one_step( - 0, source.as_bytes(), &(0..source.len()))?; + let next_part = parse_template_one_step(0, source.as_bytes(), &(0..source.len()))?; if let Some(part) = next_part { let mut output = Vec::with_capacity(source.len() + source.len() / 10); - expand_template_part(&part, &mut output, variables, &|x| { - x.as_ref().as_bytes() - })?; + expand_template_part(&part, &mut output, variables, &|x| x.as_ref().as_bytes())?; // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. Ok((part.size(), unsafe { String::from_utf8_unchecked(output) })) } else { @@ -820,12 +815,33 @@ mod test { let source = r"hello $NAME. Nice\$to meet you $NAME."; assert_eq!(substitute_one(source, &variables).unwrap(), (6, "hello ".into())); assert_eq!(substitute_one(&source[6..], &variables).unwrap(), (5, "subst".into())); - assert_eq!(substitute_one(&source[6+5..], &variables).unwrap(), (6, ". Nice".into())); - assert_eq!(substitute_one(&source[6+5+6..], &variables).unwrap(), (2, "$".into())); - assert_eq!(substitute_one(&source[6+5+6+2..], &variables).unwrap(), (12, "to meet you ".into())); - assert_eq!(substitute_one(&source[6+5+6+2+12..], &variables).unwrap(), (5, "subst".into())); - assert_eq!(substitute_one(&source[6+5+6+2+12+5..], &variables).unwrap(), (1, ".".into())); - assert_eq!(substitute_one(&source[6+5+6+2+12+5+1..], &variables).unwrap(), (0, "".into())); - assert_eq!(substitute_one(&source[6+5+6+2+12+5+1..], &variables).unwrap(), (0, "".into())); + assert_eq!( + substitute_one(&source[6 + 5..], &variables).unwrap(), + (6, ". Nice".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6..], &variables).unwrap(), + (2, "$".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2..], &variables).unwrap(), + (12, "to meet you ".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2 + 12..], &variables).unwrap(), + (5, "subst".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2 + 12 + 5..], &variables).unwrap(), + (1, ".".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2 + 12 + 5 + 1..], &variables).unwrap(), + (0, "".into()) + ); + assert_eq!( + substitute_one(&source[6 + 5 + 6 + 2 + 12 + 5 + 1..], &variables).unwrap(), + (0, "".into()) + ); } } From 71a90a100253e255dbcd955e22c196a1a95d15a4 Mon Sep 17 00:00:00 2001 From: Ulrich Hornung Date: Mon, 8 Jan 2024 00:54:27 +0100 Subject: [PATCH 7/7] clippy --- src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0cbebd0..11abd7f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -216,7 +216,7 @@ fn parse_template<'a>(source: &'a [u8], range: &std::ops::Range) -> Resul let mut parts = Vec::new(); let mut finger = range.start; while let Some(part) = parse_template_one_step(finger, source, range)? { - finger = finger + part.size(); + finger += part.size(); parts.push(part); } @@ -253,12 +253,12 @@ where Ok(()) } -fn expand_template_part_escaped_char<'a>(e: &EscapedCharTemplate, output: &mut Vec) -> Result<(), Error> { +fn expand_template_part_escaped_char(e: &EscapedCharTemplate, output: &mut Vec) -> Result<(), Error> { output.push(e.name); Ok(()) } -fn expand_template_part_literal<'a>(l: &LiteralTemplate, output: &mut Vec) -> Result<(), Error> { +fn expand_template_part_literal(l: &LiteralTemplate, output: &mut Vec) -> Result<(), Error> { output.extend_from_slice(l.text); Ok(()) }