From 52077cb61297e7e78a65adac2245ed3abf16bc5c Mon Sep 17 00:00:00 2001 From: Ulrich Hornung <hornunguli@gmx.de> Date: Sun, 7 Jan 2024 15:08:13 +0100 Subject: [PATCH] commit sources of "subst" crate with modifications. --- submodules/subst/.editorconfig | 8 + submodules/subst/CHANGELOG | 25 ++ submodules/subst/Cargo.toml | 31 ++ submodules/subst/LICENSE-APACHE | 201 +++++++++++ submodules/subst/LICENSE-BSD | 24 ++ submodules/subst/README.md | 50 +++ submodules/subst/README.tpl | 9 + submodules/subst/rustfmt.toml | 9 + submodules/subst/src/error.rs | 379 ++++++++++++++++++++ submodules/subst/src/lib.rs | 618 ++++++++++++++++++++++++++++++++ submodules/subst/src/map.rs | 103 ++++++ submodules/subst/src/yaml.rs | 209 +++++++++++ 12 files changed, 1666 insertions(+) create mode 100644 submodules/subst/.editorconfig create mode 100644 submodules/subst/CHANGELOG create mode 100644 submodules/subst/Cargo.toml create mode 100644 submodules/subst/LICENSE-APACHE create mode 100644 submodules/subst/LICENSE-BSD create mode 100644 submodules/subst/README.md create mode 100644 submodules/subst/README.tpl create mode 100644 submodules/subst/rustfmt.toml create mode 100644 submodules/subst/src/error.rs create mode 100644 submodules/subst/src/lib.rs create mode 100644 submodules/subst/src/map.rs create mode 100644 submodules/subst/src/yaml.rs diff --git a/submodules/subst/.editorconfig b/submodules/subst/.editorconfig new file mode 100644 index 00000000000..d1ddcc77d9b --- /dev/null +++ b/submodules/subst/.editorconfig @@ -0,0 +1,8 @@ +root = true + +[*] +indent_style = tab +indent_size = 4 +end_of_line = lf +trim_trailing_whitespace = true +insert_final_newline = true diff --git a/submodules/subst/CHANGELOG b/submodules/subst/CHANGELOG new file mode 100644 index 00000000000..0bb8fe80cf6 --- /dev/null +++ b/submodules/subst/CHANGELOG @@ -0,0 +1,25 @@ +# Version 0.3.0 - 2023-08-31 +- [add][minor] Expose error details publically. +- [fix][minor] Fix panic when processing multibyte UTF-8 escape sequence. +- [change][major] Require a valid UTF-8 `&str` for `Error::print_source_highlighting()`. + +# Version 0.2.3 - 2023-07-26 +- [add][minor] Support unsized `VariableMap` objects, such as `&dyn VariableMap`. + +# Version 0.2.2 - 2022-09-19 +- [change][minor] Have `NoSubstitution` return an uninhibated `NeverType` as value. + +# Version 0.2.1 - 2022-09-19 +- [add][minor] Add a `NoSubstitution` map to allow disabling substitution in generic code. + +# Version 0.2.0 - 2022-09-19 +- [change][major] Update `serde_yaml` to version `0.9`. + +# Version 0.1.2 - 2022-03-03 +- [change][minor] Remove use of new features to be compatible with older Rust versions. + +# Version 0.1.1 - 2022-03-01 +- [add][minor] Add support for substitution in all string values of YAML data. + +# Version 0.1.0 - 2022-02-22 +- [add][major] Initial release. diff --git a/submodules/subst/Cargo.toml b/submodules/subst/Cargo.toml new file mode 100644 index 00000000000..af5eb58d628 --- /dev/null +++ b/submodules/subst/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "subst" +description = "shell-like variable substitution" +version = "0.3.0" +license = "BSD-2-Clause OR Apache-2.0" +repository = "https://github.com/fizyr/subst" +documentation = "https://docs.rs/subst" +readme = "README.md" +publish = ["crates-io"] + +keywords = ["substitution", "expansion", "variable", "parameter", "shell"] +categories = ["template-engine", "value-formatting"] + +edition = "2018" + +[features] +yaml = ["serde", "serde_yaml"] + +[dependencies] +memchr = "2.4.1" +serde = { version = "1.0.0", optional = true } +serde_yaml = { version = "0.9.13", optional = true } +unicode-width = "0.1.9" + +[dev-dependencies] +assert2 = "0.3.6" +subst = { path = ".", features = ["yaml"] } +serde = { version = "1.0.0", features = ["derive"] } + +[package.metadata.docs.rs] +all-features = true diff --git a/submodules/subst/LICENSE-APACHE b/submodules/subst/LICENSE-APACHE new file mode 100644 index 00000000000..16fe87b06e8 --- /dev/null +++ b/submodules/subst/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/submodules/subst/LICENSE-BSD b/submodules/subst/LICENSE-BSD new file mode 100644 index 00000000000..ca9b9e42ff6 --- /dev/null +++ b/submodules/subst/LICENSE-BSD @@ -0,0 +1,24 @@ +Copyright 2022, Fizyr B.V. <info@fizyr.com> +Copyright 2022, Maarten de Vries <maarten@de-vri.es> +Copyright 2022, The Contributors + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/submodules/subst/README.md b/submodules/subst/README.md new file mode 100644 index 00000000000..05fa53235c1 --- /dev/null +++ b/submodules/subst/README.md @@ -0,0 +1,50 @@ +# subst + +Shell-like variable substitution for strings and byte strings. + +## Features + +* Perform substitution in `&str` or in `&[u8]`. +* Provide a custom map of variables or use environment variables. +* Short format: `"Hello $name!"` +* Long format: `"Hello ${name}!"` +* Default values: `"Hello ${name:person}!"` +* Recursive substitution in default values: `"${XDG_CONFIG_HOME:$HOME/.config}/my-app/config.toml"` +* Perform substitution on all string values in YAML data (optional, requires the `yaml` feature). + +Variable names can consist of alphanumeric characters and underscores. +They are allowed to start with numbers. + +## Examples + +The [`substitute()`][substitute] function can be used to perform substitution on a `&str`. +The variables can either be a [`HashMap`][std::collections::HashMap] or a [`BTreeMap`][std::collections::BTreeMap]. + +```rust +let mut variables = HashMap::new(); +variables.insert("name", "world"); +assert_eq!(subst::substitute("Hello $name!", &variables)?, "Hello world!"); +``` + +The variables can also be taken directly from the environment with the [`Env`][Env] map. + +```rust +assert_eq!( + subst::substitute("$XDG_CONFIG_HOME/my-app/config.toml", &subst::Env)?, + "/home/user/.config/my-app/config.toml", +); +``` + +Substitution can also be done on byte strings using the [`substitute_bytes()`][substitute_bytes] function. + +```rust +let mut variables = HashMap::new(); +variables.insert("name", b"world"); +assert_eq!(subst::substitute_bytes(b"Hello $name!", &variables)?, b"Hello world!"); +``` + +[substitute]: https://docs.rs/subst/latest/subst/fn.substitute.html +[substitute_bytes]: https://docs.rs/subst/latest/subst/fn.substitute_bytes.html +[Env]: https://docs.rs/subst/latest/subst/struct.Env.html +[std::collections::HashMap]: https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html +[std::collections::BTreeMap]: https://doc.rust-lang.org/stable/std/collections/struct.BTreeMap.html diff --git a/submodules/subst/README.tpl b/submodules/subst/README.tpl new file mode 100644 index 00000000000..56f6054de00 --- /dev/null +++ b/submodules/subst/README.tpl @@ -0,0 +1,9 @@ +# {{crate}} + +{{readme}} + +[substitute]: https://docs.rs/subst/latest/subst/fn.substitute.html +[substitute_bytes]: https://docs.rs/subst/latest/subst/fn.substitute_bytes.html +[Env]: https://docs.rs/subst/latest/subst/struct.Env.html +[std::collections::HashMap]: https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html +[std::collections::BTreeMap]: https://doc.rust-lang.org/stable/std/collections/struct.BTreeMap.html diff --git a/submodules/subst/rustfmt.toml b/submodules/subst/rustfmt.toml new file mode 100644 index 00000000000..4b9795f394e --- /dev/null +++ b/submodules/subst/rustfmt.toml @@ -0,0 +1,9 @@ +hard_tabs = true +tab_spaces = 4 +max_width = 120 +imports_layout = "HorizontalVertical" +match_block_trailing_comma = true +overflow_delimited_expr = true +reorder_impl_items = true +unstable_features = true +use_field_init_shorthand = true diff --git a/submodules/subst/src/error.rs b/submodules/subst/src/error.rs new file mode 100644 index 00000000000..c49d6e1579a --- /dev/null +++ b/submodules/subst/src/error.rs @@ -0,0 +1,379 @@ +//! Module containing error details. + +/// An error that can occur during variable substitution. +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum Error { + /// The input string contains an invalid escape sequence. + InvalidEscapeSequence(InvalidEscapeSequence), + + /// The input string contains a variable placeholder without a variable name (`"${}"`). + MissingVariableName(MissingVariableName), + + /// The input string contains an unexpected character. + UnexpectedCharacter(UnexpectedCharacter), + + /// The input string contains an unclosed variable placeholder. + MissingClosingBrace(MissingClosingBrace), + + /// The input string contains a placeholder for a variable that is not in the variable map. + NoSuchVariable(NoSuchVariable), +} + +impl From<InvalidEscapeSequence> for Error { + #[inline] + fn from(other: InvalidEscapeSequence) -> Self { + Self::InvalidEscapeSequence(other) + } +} + +impl From<MissingVariableName> for Error { + #[inline] + fn from(other: MissingVariableName) -> Self { + Self::MissingVariableName(other) + } +} + +impl From<UnexpectedCharacter> for Error { + #[inline] + fn from(other: UnexpectedCharacter) -> Self { + Self::UnexpectedCharacter(other) + } +} + +impl From<MissingClosingBrace> for Error { + #[inline] + fn from(other: MissingClosingBrace) -> Self { + Self::MissingClosingBrace(other) + } +} + +impl From<NoSuchVariable> for Error { + #[inline] + fn from(other: NoSuchVariable) -> Self { + Self::NoSuchVariable(other) + } +} + +impl std::error::Error for Error {} + +impl std::fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::InvalidEscapeSequence(e) => e.fmt(f), + Self::MissingVariableName(e) => e.fmt(f), + Self::UnexpectedCharacter(e) => e.fmt(f), + Self::MissingClosingBrace(e) => e.fmt(f), + Self::NoSuchVariable(e) => e.fmt(f), + } + } +} + +/// A character or byte from the input. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum CharOrByte { + /// A unicode character. + Char(char), + + /// A byte value. + Byte(u8), +} + +impl CharOrByte { + /// Get the byte length of the character in the source. + /// + /// For [`Self::Char`], this returns the UTF-8 lengths of the character. + /// For [`Self::Byte`], this is always returns 1. + #[inline] + pub fn source_len(&self) -> usize { + match self { + Self::Char(c) => c.len_utf8(), + Self::Byte(_) => 1, + } + } + + /// Return a printable version of `self` for error messages. + /// + /// The returned value implements `Display` and is suitable for inclusion in error messages. + pub fn quoted_printable(&self) -> impl std::fmt::Display { + #[derive(Copy, Clone, Debug)] + struct QuotedPrintable { + inner: CharOrByte, + } + + impl std::fmt::Display for QuotedPrintable { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.inner { + CharOrByte::Char(value) => write!(f, "{value:?}"), + CharOrByte::Byte(value) => { + if value.is_ascii() { + write!(f, "{:?}", char::from(value)) + } else { + write!(f, "'\\x{value:02X}'") + } + }, + } + } + } + + QuotedPrintable { inner: *self } + } +} + +impl std::fmt::Display for CharOrByte { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match *self { + Self::Char(value) => write!(f, "{value}"), + Self::Byte(value) => { + if value.is_ascii() { + write!(f, "{}", char::from(value)) + } else { + write!(f, "0x{value:02X}") + } + }, + } + } +} + +/// The input string contains an invalid escape sequence. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct InvalidEscapeSequence { + /// The byte offset within the input where the error occurs. + /// + /// This points to the associated backslash character in the source text. + pub position: usize, + + /// The character value of the invalid escape sequence. + /// + /// If the unexpected character is not a valid UTF-8 sequence, + /// this will simply hold the value of the first byte after the backslash character. + /// + /// If a backslash character occurs at the end of the input, this field is set to `None`. + pub character: Option<CharOrByte>, +} + +impl std::error::Error for InvalidEscapeSequence {} + +impl std::fmt::Display for InvalidEscapeSequence { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + if let Some(c) = self.character { + write!(f, "Invalid escape sequence: \\{}", c) + } else { + write!(f, "Invalid escape sequence: missing escape character") + } + } +} + +/// The input string contains a variable placeholder without a variable name (`"${}"`). +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct MissingVariableName { + /// The byte offset within the input where the error occurs. + /// + /// This points to the `$` sign with a missing variable name in the input text. + pub position: usize, + + /// The length of the variable placeholder in bytes. + pub len: usize, +} + +impl std::error::Error for MissingVariableName {} + +impl std::fmt::Display for MissingVariableName { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Missing variable name") + } +} + +/// The input string contains an unexpected character. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct UnexpectedCharacter { + /// The byte offset within the input where the error occurs. + /// + /// This points to the unexpected character in the input text. + pub position: usize, + + /// The unexpected character. + /// + /// If the unexpected character is not a valid UTF-8 sequence, + /// this will simply hold the value of the unexpected byte. + pub character: CharOrByte, + + /// A human readable message about what was expected instead. + pub expected: ExpectedCharacter, +} + +impl std::error::Error for UnexpectedCharacter {} + +impl std::fmt::Display for UnexpectedCharacter { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "Unexpected character: {}, expected {}", + self.character.quoted_printable(), + self.expected.message() + ) + } +} + +/// A struct to describe what was expected instead of the unexpected character. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct ExpectedCharacter { + /// A human readable message to describe what is expected. + pub(crate) message: &'static str, +} + +impl ExpectedCharacter { + /// Get a human readable message to describe what was expected. + #[inline] + pub fn message(&self) -> &str { + self.message + } +} + +/// The input string contains an unclosed variable placeholder. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct MissingClosingBrace { + /// The byte offset within the input where the error occurs. + /// + /// This points to the `{` character that is missing a closing brace. + pub position: usize, +} + +impl std::error::Error for MissingClosingBrace {} + +impl std::fmt::Display for MissingClosingBrace { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Missing closing brace") + } +} + +/// The input string contains a placeholder for a variable that is not in the variable map. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct NoSuchVariable { + /// The byte offset within the input where the error occurs. + /// + /// This points to the first character of the name in the input text. + pub position: usize, + + /// The name of the variable. + pub name: String, +} + +impl std::error::Error for NoSuchVariable {} + +impl std::fmt::Display for NoSuchVariable { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "No such variable: ${}", self.name) + } +} + +impl Error { + /// Get the range in the source text that contains the error. + #[inline] + pub fn source_range(&self) -> std::ops::Range<usize> { + let (start, len) = match &self { + Self::InvalidEscapeSequence(e) => { + let char_len = e.character.map(|x| x.source_len()).unwrap_or(0); + (e.position, 1 + char_len) + }, + Self::MissingVariableName(e) => (e.position, e.len), + Self::UnexpectedCharacter(e) => (e.position, e.character.source_len()), + Self::MissingClosingBrace(e) => (e.position, 1), + Self::NoSuchVariable(e) => (e.position, e.name.len()), + }; + std::ops::Range { + start, + end: start + len, + } + } + + /// Get the line of source that contains the error. + /// + /// # Panics + /// May panic if the source text is not the original source that contains the error. + #[inline] + pub fn source_line<'a>(&self, source: &'a str) -> &'a str { + let position = self.source_range().start; + let start = line_start(source, position); + let end = line_end(source, position); + &source[start..end] + } + + /// Write source highlighting for the error location. + /// + /// The highlighting ends with a newline. + /// + /// Note: this function doesn't print anything if the source line exceeds 60 characters in width. + /// For more control over this behaviour, consider using [`Self::source_range()`] and [`Self::source_line()`] instead. + #[inline] + pub fn write_source_highlighting(&self, f: &mut impl std::fmt::Write, source: &str) -> std::fmt::Result { + use unicode_width::UnicodeWidthStr; + + let range = self.source_range(); + let line = self.source_line(source); + if line.width() > 60 { + return Ok(()); + } + write!(f, " {}\n ", line)?; + write_underline(f, line, range)?; + writeln!(f) + } + + /// Get source highlighting for the error location as a string. + /// + /// The highlighting ends with a newline. + /// + /// Note: this function returns an empty string if the source line exceeds 60 characters in width. + #[inline] + pub fn source_highlighting(&self, source: &str) -> String { + let mut output = String::new(); + self.write_source_highlighting(&mut output, source).unwrap(); + output + } +} + +fn line_start(source: &str, position: usize) -> usize { + match source[..position].rfind(|c| c == '\n' || c == '\r') { + Some(line_end) => line_end + 1, + None => 0, + } +} + +fn line_end(source: &str, position: usize) -> usize { + match source[position..].find(|c| c == '\n' || c == '\r') { + Some(line_end) => position + line_end, + None => source.len(), + } +} + +fn write_underline(f: &mut impl std::fmt::Write, line: &str, range: std::ops::Range<usize>) -> std::fmt::Result { + use unicode_width::UnicodeWidthStr; + let spaces = line[..range.start].width(); + let carets = line[range].width(); + write!(f, "{}", " ".repeat(spaces))?; + write!(f, "{}", "^".repeat(carets))?; + Ok(()) +} + +#[cfg(test)] +mod test { + use assert2::check; + + #[test] + fn test_char_or_byte_quoated_printable() { + use super::CharOrByte::{Byte, Char}; + check!(Byte(0x81).quoted_printable().to_string() == r"'\x81'"); + check!(Byte(0x79).quoted_printable().to_string() == r"'y'"); + check!(Char('\x79').quoted_printable().to_string() == r"'y'"); + + check!(Byte(0).quoted_printable().to_string() == r"'\0'"); + check!(Char('\0').quoted_printable().to_string() == r"'\0'"); + } +} diff --git a/submodules/subst/src/lib.rs b/submodules/subst/src/lib.rs new file mode 100644 index 00000000000..e9c82f4816a --- /dev/null +++ b/submodules/subst/src/lib.rs @@ -0,0 +1,618 @@ +//! Shell-like variable substitution for strings and byte strings. +//! +//! # Features +//! +//! * Perform substitution in `&str` or in `&[u8]`. +//! * Provide a custom map of variables or use environment variables. +//! * Short format: `"Hello $name!"` +//! * Long format: `"Hello ${name}!"` +//! * Default values: `"Hello ${name:person}!"` +//! * Recursive substitution in default values: `"${XDG_CONFIG_HOME:$HOME/.config}/my-app/config.toml"` +//! * Perform substitution on all string values in YAML data (optional, requires the `yaml` feature). +//! +//! Variable names can consist of alphanumeric characters and underscores. +//! They are allowed to start with numbers. +//! +//! # Examples +//! +//! The [`substitute()`][substitute] function can be used to perform substitution on a `&str`. +//! The variables can either be a [`HashMap`][std::collections::HashMap] or a [`BTreeMap`][std::collections::BTreeMap]. +//! +//! ``` +//! # fn main() -> Result<(), subst::Error> { +//! # use std::collections::HashMap; +//! let mut variables = HashMap::new(); +//! variables.insert("name", "world"); +//! assert_eq!(subst::substitute("Hello $name!", &variables)?, "Hello world!"); +//! # Ok(()) +//! # } +//! ``` +//! +//! The variables can also be taken directly from the environment with the [`Env`][Env] map. +//! +//! ``` +//! # fn main() -> Result<(), subst::Error> { +//! # std::env::set_var("XDG_CONFIG_HOME", "/home/user/.config"); +//! assert_eq!( +//! subst::substitute("$XDG_CONFIG_HOME/my-app/config.toml", &subst::Env)?, +//! "/home/user/.config/my-app/config.toml", +//! ); +//! # Ok(()) +//! # } +//! ``` +//! +//! Substitution can also be done on byte strings using the [`substitute_bytes()`][substitute_bytes] function. +//! +//! ``` +//! # fn main() -> Result<(), subst::Error> { +//! # use std::collections::HashMap; +//! let mut variables = HashMap::new(); +//! variables.insert("name", b"world"); +//! assert_eq!(subst::substitute_bytes(b"Hello $name!", &variables)?, b"Hello world!"); +//! # Ok(()) +//! # } +//! ``` +#![warn(missing_docs, missing_debug_implementations)] + +pub mod error; +pub use error::Error; + +mod map; +pub use map::*; + +#[cfg(feature = "yaml")] +pub mod yaml; + +/// Substitute variables in a string. +/// +/// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. +/// A variable name can only consist of ASCII letters, digits and underscores. +/// They are allowed to start with numbers. +/// +/// You can escape dollar signs, backslashes, colons and braces with a backslash. +/// +/// You can pass either a [`HashMap`][std::collections::HashMap], [`BTreeMap`][std::collections::BTreeMap] or [`Env`] as the `variables` parameter. +/// The maps must have [`&str`] or [`String`] keys, and the values must be [`AsRef<str>`]. +pub fn substitute<'a, M>(source: &str, variables: &'a M) -> Result<String, Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef<str>, +{ + let mut output = Vec::with_capacity(source.len() + source.len() / 10); + substitute_impl(&mut output, source.as_bytes(), 0..source.len(), variables, &|x| { + x.as_ref().as_bytes() + })?; + // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. + unsafe { Ok(String::from_utf8_unchecked(output)) } +} + +/// Does one sub-step of substitute +/// +/// Returns Some((replacement_string, next_position_in_source_str_after_variable_name)) when succeeded. +/// Returns None if no substitution was possible. +pub fn substitute_one_step<'a, M>(source: &str, variables: &'a M) -> Result<Option<(String, usize)>, Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef<str>, +{ + let mut output = Vec::with_capacity(source.len() + source.len() / 10); + let new_finger_option = + substitute_impl_one_step(0, &mut output, source.as_bytes(), &(0..source.len()), variables, &|x| { + x.as_ref().as_bytes() + })?; + + if new_finger_option.is_none() { + return Ok(None); + } + + // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. + let output_str = unsafe { String::from_utf8_unchecked(output) }; + + Ok(Some((output_str, new_finger_option.unwrap()))) +} + +/// Substitute variables in a byte string. +/// +/// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. +/// A variable name can only consist of ASCII letters, digits and underscores. +/// They are allowed to start with numbers. +/// +/// You can escape dollar signs, backslashes, colons and braces with a backslash. +/// +/// You can pass either a [`HashMap`][std::collections::HashMap], [`BTreeMap`][std::collections::BTreeMap] as the `variables` parameter. +/// The maps must have [`&str`] or [`String`] keys, and the values must be [`AsRef<[u8]>`]. +/// On Unix platforms, you can also use [`EnvBytes`]. +pub fn substitute_bytes<'a, M>(source: &[u8], variables: &'a M) -> Result<Vec<u8>, Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef<[u8]>, +{ + let mut output = Vec::with_capacity(source.len() + source.len() / 10); + substitute_impl(&mut output, source, 0..source.len(), variables, &|x| x.as_ref())?; + Ok(output) +} + +fn substitute_impl_one_step<'a, M, F>( + finger: usize, + output: &mut Vec<u8>, + source: &[u8], + range: &std::ops::Range<usize>, + variables: &'a M, + to_bytes: &F, +) -> Result<Option<usize>, Error> +where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], +{ + let next = match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { + Some(x) => finger + x, + None => return Ok(None), + }; + + output.extend_from_slice(&source[finger..next]); + if source[next] == b'\\' { + output.push(unescape_one(source, next)?); + Ok(Some(next + 2)) + } else { + let variable = parse_variable(source, next)?; + let value = variables.get(variable.name); + match (&value, &variable.default) { + (None, None) => { + return Err(error::NoSuchVariable { + position: variable.name_start, + name: variable.name.to_owned(), + } + .into()) + }, + (Some(value), _) => { + output.extend_from_slice(to_bytes(value)); + }, + (None, Some(default)) => { + substitute_impl(output, source, default.clone(), variables, to_bytes)?; + }, + }; + Ok(Some(variable.end_position)) + } +} + +/// Substitute variables in a byte string. +/// +/// This is the real implementation used by both [`substitute`] and [`substitute_bytes`]. +/// The function accepts any type that implements [`VariableMap`], and a function to convert the value from the map into bytes. +fn substitute_impl<'a, M, F>( + output: &mut Vec<u8>, + source: &[u8], + range: std::ops::Range<usize>, + variables: &'a M, + to_bytes: &F, +) -> Result<(), Error> +where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], +{ + let mut finger = range.start; + while finger < range.end { + let new_finger_option = substitute_impl_one_step(finger, output, source, &range, variables, to_bytes)?; + if let Some(new_finger) = new_finger_option { + finger = new_finger; + } else { + break; + } + } + + output.extend_from_slice(&source[finger..range.end]); + Ok(()) +} + +/// A parsed variable. +#[derive(Debug)] +struct Variable<'a> { + /// The name of the variable. + name: &'a str, + + /// The start position of the name in the source. + name_start: usize, + + /// The default value of the variable. + default: Option<std::ops::Range<usize>>, + + /// The end position of the entire variable in the source. + end_position: usize, +} + +/// Parse a variable from source at the given position. +/// +/// The finger must be the position of the dollar sign in the source. +fn parse_variable(source: &[u8], finger: usize) -> Result<Variable, Error> { + if finger == source.len() { + return Err(error::MissingVariableName { + position: finger, + len: 1, + } + .into()); + } + if source[finger + 1] == b'{' { + parse_braced_variable(source, finger) + } else { + let name_start = finger + 1; + check_fist_char_of_variable_name(source, name_start)?; + let name_end = match source[name_start..] + .iter() + .position(|&c| !c.is_ascii_alphanumeric() && c != b'_') + { + Some(0) => { + return Err(error::MissingVariableName { + position: finger, + len: 1, + } + .into()) + }, + Some(x) => finger + 1 + x, + None => source.len(), + }; + Ok(Variable { + name: std::str::from_utf8(&source[finger + 1..name_end]).unwrap(), + name_start: finger + 1, + default: None, + end_position: name_end, + }) + } +} + +fn check_fist_char_of_variable_name(source: &[u8], name_start: usize) -> Result<(), Error> { + let first_char = source[name_start]; + if first_char.is_ascii_digit() { + return Err(error::Error::UnexpectedCharacter(error::UnexpectedCharacter { + position: name_start, + character: error::CharOrByte::Byte(first_char), + expected: error::ExpectedCharacter { + message: "variable name must not start with 0..9", + }, + })); + } + + Ok(()) +} + +/// Parse a braced variable in the form of "${name[:default]} from source at the given position. +/// +/// The finger must be the position of the dollar sign in the source. +fn parse_braced_variable(source: &[u8], finger: usize) -> Result<Variable, Error> { + let name_start = finger + 2; + if name_start >= source.len() { + return Err(error::MissingVariableName { + position: finger, + len: 2, + } + .into()); + } + + check_fist_char_of_variable_name(source, name_start)?; + + // Get the first sequence of alphanumeric characters and underscores for the variable name. + let name_end = match source[name_start..] + .iter() + .position(|&c| !c.is_ascii_alphanumeric() && c != b'_') + { + Some(0) => { + return Err(error::MissingVariableName { + position: finger, + len: 2, + } + .into()) + }, + Some(x) => name_start + x, + None => source.len(), + }; + + // If the name extends to the end, we're missing a closing brace. + if name_end == source.len() { + return Err(error::MissingClosingBrace { position: finger + 1 }.into()); + } + + // If there is a closing brace after the name, there is no default value and we're done. + if source[name_end] == b'}' { + return Ok(Variable { + name: std::str::from_utf8(&source[name_start..name_end]).unwrap(), + name_start, + default: None, + end_position: name_end + 1, + }); + + // If there is something other than a closing brace or colon after the name, it's an error. + } else if source[name_end] != b':' { + return Err(error::UnexpectedCharacter { + position: name_end, + character: get_maybe_char_at(source, name_end), + expected: error::ExpectedCharacter { + message: "a closing brace ('}') or colon (':')", + }, + } + .into()); + } + + // If there is no un-escaped closing brace, it's missing. + let end = finger + + find_non_escaped(b'}', &source[finger..]).ok_or(error::MissingClosingBrace { position: finger + 1 })?; + + Ok(Variable { + name: std::str::from_utf8(&source[name_start..name_end]).unwrap(), + name_start, + default: Some(name_end + 1..end), + end_position: end + 1, + }) +} + +/// Get the prefix from the input that is valid UTF-8 as [`str`]. +/// +/// If the whole input is valid UTF-8, the whole input is returned. +/// If the first byte is already invalid UTF-8, an empty string is returned. +fn valid_utf8_prefix(input: &[u8]) -> &str { + // The unwrap can not panic: we used `e.valid_up_to()` to get the valid UTF-8 slice. + std::str::from_utf8(input) + .or_else(|e| std::str::from_utf8(&input[..e.valid_up_to()])) + .unwrap() +} + +/// Get the character at a given index. +/// +/// If the data at the given index contains a valid UTF-8 sequence, +/// returns a [`error::CharOrByte::Char`]. +/// Otherwise, returns a [`error::CharOrByte::Byte`]. +fn get_maybe_char_at(data: &[u8], index: usize) -> error::CharOrByte { + let head = &data[index..]; + let head = &head[..head.len().min(4)]; + assert!( + !head.is_empty(), + "index out of bounds: data.len() is {} but index is {}", + data.len(), + index + ); + + let head = valid_utf8_prefix(head); + if let Some(c) = head.chars().next() { + error::CharOrByte::Char(c) + } else { + error::CharOrByte::Byte(data[index]) + } +} + +/// Find the first non-escaped occurrence of a character. +fn find_non_escaped(needle: u8, haystack: &[u8]) -> Option<usize> { + let mut finger = 0; + while finger < haystack.len() { + let candidate = memchr::memchr2(b'\\', needle, &haystack[finger..])?; + if haystack[finger + candidate] == b'\\' { + if candidate == haystack.len() - 1 { + return None; + } + finger += candidate + 2; + } else { + return Some(finger + candidate); + } + } + None +} + +/// Unescape a single escape sequence in source at the given position. +/// +/// The `position` must point to the backslash character in the source text. +/// +/// Only valid escape sequences ('\$' '\{' '\}' and '\:') are accepted. +/// Invalid escape sequences cause an error to be returned. +fn unescape_one(source: &[u8], position: usize) -> Result<u8, Error> { + if position == source.len() - 1 { + return Err(error::InvalidEscapeSequence { + position, + character: None, + } + .into()); + } + match source[position + 1] { + b'\\' => Ok(b'\\'), + b'$' => Ok(b'$'), + b'{' => Ok(b'{'), + b'}' => Ok(b'}'), + b':' => Ok(b':'), + _ => Err(error::InvalidEscapeSequence { + position, + character: Some(get_maybe_char_at(source, position + 1)), + } + .into()), + } +} + +#[cfg(test)] +mod test { + use super::*; + use assert2::{assert, check, let_assert}; + use std::collections::BTreeMap; + + #[test] + fn test_get_maybe_char_at() { + use error::CharOrByte::{Byte, Char}; + assert!(get_maybe_char_at(b"hello", 0) == Char('h')); + assert!(get_maybe_char_at(b"he", 0) == Char('h')); + assert!(get_maybe_char_at(b"hello", 1) == Char('e')); + assert!(get_maybe_char_at(b"he", 1) == Char('e')); + assert!(get_maybe_char_at(b"hello\x80", 1) == Char('e')); + assert!(get_maybe_char_at(b"he\x80llo\x80", 1) == Char('e')); + + assert!(get_maybe_char_at(b"h\x79", 1) == Char('\x79')); + assert!(get_maybe_char_at(b"h\x80llo", 1) == Byte(0x80)); + + // The UTF-8 sequence for '❤' is [0xE2, 0x9D, 0xA4]". + assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 0) == Char('h')); + assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 1) == Char('❤')); + assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 2) == Byte(0x9d)); + assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 3) == Byte(0xA4)); + assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 4) == Char('l')); + assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 5) == Char('l')); + assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 6) == Char('❤')); + assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 7) == Byte(0x9d)); + assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 8) == Byte(0xA4)); + } + + #[test] + fn test_find_non_escaped() { + check!(find_non_escaped(b'$', b"$foo") == Some(0)); + check!(find_non_escaped(b'$', b"\\$foo$") == Some(5)); + check!(find_non_escaped(b'$', b"foo $bar") == Some(4)); + check!(find_non_escaped(b'$', b"foo \\$$bar") == Some(6)); + } + + #[test] + fn test_substitute() { + let mut map: BTreeMap<String, String> = BTreeMap::new(); + map.insert("name".into(), "world".into()); + check!(let Ok("Hello world!") = substitute("Hello $name!", &map).as_deref()); + check!(let Ok("Hello world!") = substitute("Hello ${name}!", &map).as_deref()); + check!(let Ok("Hello world!") = substitute("Hello ${name:not-world}!", &map).as_deref()); + check!(let Ok("Hello world!") = substitute("Hello ${not_name:world}!", &map).as_deref()); + + let mut map: BTreeMap<&str, &str> = BTreeMap::new(); + map.insert("name", "world"); + check!(let Ok("Hello world!") = substitute("Hello $name!", &map).as_deref()); + check!(let Ok("Hello world!") = substitute("Hello ${name}!", &map).as_deref()); + check!(let Ok("Hello world!") = substitute("Hello ${name:not-world}!", &map).as_deref()); + check!(let Ok("Hello world!") = substitute("Hello ${not_name:world}!", &map).as_deref()); + } + + #[test] + fn substitution_in_default_value() { + let mut map: BTreeMap<String, String> = BTreeMap::new(); + map.insert("name".into(), "world".into()); + check!(let Ok("Hello cruel world!") = substitute("Hello ${not_name:cruel $name}!", &map).as_deref()); + } + + #[test] + fn test_substitute_bytes() { + let mut map: BTreeMap<String, Vec<u8>> = BTreeMap::new(); + map.insert("name".into(), b"world"[..].into()); + check!(let Ok(b"Hello world!") = substitute_bytes(b"Hello $name!", &map).as_deref()); + check!(let Ok(b"Hello world!") = substitute_bytes(b"Hello ${name}!", &map).as_deref()); + check!(let Ok(b"Hello world!") = substitute_bytes(b"Hello ${name:not-world}!", &map).as_deref()); + check!(let Ok(b"Hello world!") = substitute_bytes(b"Hello ${not_name:world}!", &map).as_deref()); + + let mut map: BTreeMap<&str, &[u8]> = BTreeMap::new(); + map.insert("name", b"world"); + check!(let Ok(b"Hello world!") = substitute_bytes(b"Hello $name!", &map).as_deref()); + check!(let Ok(b"Hello world!") = substitute_bytes(b"Hello ${name}!", &map).as_deref()); + check!(let Ok(b"Hello world!") = substitute_bytes(b"Hello ${name:not-world}!", &map).as_deref()); + check!(let Ok(b"Hello world!") = substitute_bytes(b"Hello ${not_name:world}!", &map).as_deref()); + } + + #[test] + fn test_invalid_escape_sequence() { + let map: BTreeMap<String, String> = BTreeMap::new(); + + let source = r"Hello \world!"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == r"Invalid escape sequence: \w"); + assert!(e.source_highlighting(source) == concat!(r" Hello \world!", "\n", r" ^^", "\n",)); + + let source = r"Hello \❤❤"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == r"Invalid escape sequence: \❤"); + assert!(e.source_highlighting(source) == concat!(r" Hello \❤❤", "\n", r" ^^", "\n",)); + + let source = r"Hello world!\"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == r"Invalid escape sequence: missing escape character"); + assert!(e.source_highlighting(source) == concat!(r" Hello world!\", "\n", r" ^", "\n",)); + } + + #[test] + fn test_missing_variable_name() { + let map: BTreeMap<String, String> = BTreeMap::new(); + + let source = r"Hello $!"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == r"Missing variable name"); + assert!(e.source_highlighting(source) == concat!(r" Hello $!", "\n", r" ^", "\n",)); + + let source = r"Hello ${}!"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == r"Missing variable name"); + assert!(e.source_highlighting(source) == concat!(r" Hello ${}!", "\n", r" ^^", "\n",)); + + let source = r"Hello ${:fallback}!"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == r"Missing variable name"); + assert!(e.source_highlighting(source) == concat!(r" Hello ${:fallback}!", "\n", r" ^^", "\n",)); + + let source = r"Hello $❤"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == r"Missing variable name"); + assert!(e.source_highlighting(source) == concat!(r" Hello $❤", "\n", r" ^", "\n",)); + } + + #[test] + fn test_unexpected_character() { + let map: BTreeMap<String, String> = BTreeMap::new(); + + let source = "Hello ${name)!"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == "Unexpected character: ')', expected a closing brace ('}') or colon (':')"); + assert!(e.source_highlighting(source) == concat!(" Hello ${name)!\n", " ^\n",)); + + let source = "Hello ${name❤"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == "Unexpected character: '❤', expected a closing brace ('}') or colon (':')"); + assert!(e.source_highlighting(source) == concat!(" Hello ${name❤\n", " ^\n",)); + + let source = b"\xE2\x98Hello ${name\xE2\x98"; + let_assert!(Err(e) = substitute_bytes(source, &map)); + assert!(e.to_string() == "Unexpected character: '\\xE2', expected a closing brace ('}') or colon (':')"); + } + + #[test] + fn test_missing_closing_brace() { + let map: BTreeMap<String, String> = BTreeMap::new(); + + let source = "Hello ${name"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == "Missing closing brace"); + assert!(e.source_highlighting(source) == concat!(" Hello ${name\n", " ^\n",)); + + let source = "Hello ${name:fallback"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == "Missing closing brace"); + assert!(e.source_highlighting(source) == concat!(" Hello ${name:fallback\n", " ^\n",)); + } + + #[test] + fn test_substitute_no_such_variable() { + let map: BTreeMap<String, String> = BTreeMap::new(); + + let source = "Hello ${name}!"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == "No such variable: $name"); + assert!(e.source_highlighting(source) == concat!(" Hello ${name}!\n", " ^^^^\n",)); + + let source = "Hello $name!"; + let_assert!(Err(e) = substitute(source, &map)); + assert!(e.to_string() == "No such variable: $name"); + assert!(e.source_highlighting(source) == concat!(" Hello $name!\n", " ^^^^\n",)); + } + + #[test] + fn test_dyn_variable_map() { + let mut variables = BTreeMap::new(); + variables.insert(String::from("aap"), String::from("noot")); + let variables: &dyn VariableMap<Value = &String> = &variables; + + let_assert!(Ok(expanded) = substitute("one ${aap}", variables)); + assert!(expanded == "one noot"); + } + + #[test] + fn test_unicode_invalid_escape_sequence() { + let mut variables = BTreeMap::new(); + variables.insert(String::from("aap"), String::from("noot")); + + let source = r"emoticon: \( ^▽^ )/"; + let_assert!(Err(e) = substitute(source, &variables)); + assert!(e.source_highlighting(source) == concat!(r" emoticon: \( ^▽^ )/", "\n", r" ^^^", "\n",)); + } +} diff --git a/submodules/subst/src/map.rs b/submodules/subst/src/map.rs new file mode 100644 index 00000000000..da2d090c788 --- /dev/null +++ b/submodules/subst/src/map.rs @@ -0,0 +1,103 @@ +use std::collections::{BTreeMap, HashMap}; +use std::hash::BuildHasher; + +/// Trait for types that can be used as a variable map. +pub trait VariableMap<'a> { + /// The type returned by the [`get()`][Self::get] function. + type Value; + + /// Get a value from the map. + fn get(&'a self, key: &str) -> Option<Self::Value>; +} + +/// A "map" that never returns any values. +#[derive(Debug)] +pub struct NoSubstitution; + +impl<'a> VariableMap<'a> for NoSubstitution { + type Value = NeverValue; + + #[inline] + fn get(&'a self, _key: &str) -> Option<Self::Value> { + None + } +} + +/// Value returned by the [`NoSubstitution`] map. +#[derive(Debug)] +pub enum NeverValue {} + +impl<T: ?Sized> AsRef<T> for NeverValue { + #[inline] + fn as_ref(&self) -> &T { + match *self {} + } +} + +/// A map that gives strings from the environment. +#[derive(Debug)] +pub struct Env; + +impl<'a> VariableMap<'a> for Env { + type Value = String; + + #[inline] + fn get(&'a self, key: &str) -> Option<Self::Value> { + std::env::var(key).ok() + } +} + +/// A map that gives byte strings from the environment. +/// +/// Only available on Unix platforms. +#[cfg(unix)] +#[derive(Debug)] +pub struct EnvBytes; + +#[cfg(unix)] +impl<'a> VariableMap<'a> for EnvBytes { + type Value = Vec<u8>; + + #[inline] + fn get(&'a self, key: &str) -> Option<Self::Value> { + use std::os::unix::ffi::OsStringExt; + let value = std::env::var_os(key)?; + Some(value.into_vec()) + } +} + +impl<'a, V: 'a> VariableMap<'a> for BTreeMap<&str, V> { + type Value = &'a V; + + #[inline] + fn get(&'a self, key: &str) -> Option<Self::Value> { + self.get(key) + } +} + +impl<'a, V: 'a> VariableMap<'a> for BTreeMap<String, V> { + type Value = &'a V; + + #[inline] + fn get(&'a self, key: &str) -> Option<Self::Value> { + self.get(key) + } +} + +impl<'a, V: 'a, S: BuildHasher> VariableMap<'a> for HashMap<&str, V, S> { + type Value = &'a V; + + #[inline] + fn get(&'a self, key: &str) -> Option<Self::Value> { + self.get(key) + } +} + +impl<'a, V: 'a, S: BuildHasher> VariableMap<'a> for HashMap<String, V, S> { + type Value = &'a V; + + #[inline] + fn get(&'a self, key: &str) -> Option<Self::Value> { + self.get(key) + } +} diff --git a/submodules/subst/src/yaml.rs b/submodules/subst/src/yaml.rs new file mode 100644 index 00000000000..66cc99fa0fb --- /dev/null +++ b/submodules/subst/src/yaml.rs @@ -0,0 +1,209 @@ +//! Support for variable substitution in YAML data. + +use serde::de::DeserializeOwned; + +use crate::VariableMap; + +/// Parse a struct from YAML data, after perfoming variable substitution on string values. +/// +/// This function first parses the data into a [`serde_yaml::Value`], +/// then performs variable substitution on all string values, +/// and then parses it further into the desired type. +pub fn from_slice<'a, T: DeserializeOwned, M>(data: &[u8], variables: &'a M) -> Result<T, Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef<str>, +{ + let mut value: serde_yaml::Value = serde_yaml::from_slice(data)?; + substitute_string_values(&mut value, variables)?; + Ok(serde_yaml::from_value(value)?) +} + +/// Parse a struct from YAML data, after perfoming variable substitution on string values. +/// +/// This function first parses the data into a [`serde_yaml::Value`], +/// then performs variable substitution on all string values, +/// and then parses it further into the desired type. +pub fn from_str<'a, T: DeserializeOwned, M>(data: &str, variables: &'a M) -> Result<T, Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef<str>, +{ + let mut value: serde_yaml::Value = serde_yaml::from_str(data)?; + substitute_string_values(&mut value, variables)?; + Ok(serde_yaml::from_value(value)?) +} + +/// Perform variable substitution on string values of a YAML value. +pub fn substitute_string_values<'a, M>(value: &mut serde_yaml::Value, variables: &'a M) -> Result<(), crate::Error> +where + M: VariableMap<'a> + ?Sized, + M::Value: AsRef<str>, +{ + visit_string_values(value, |value| { + *value = crate::substitute(value.as_str(), variables)?; + Ok(()) + }) +} + +/// Error for parsing YAML with variable substitution. +#[derive(Debug)] +pub enum Error { + /// An error occured while parsing YAML. + Yaml(serde_yaml::Error), + + /// An error occured while performing variable substitution. + Subst(crate::Error), +} + +impl From<serde_yaml::Error> for Error { + #[inline] + fn from(other: serde_yaml::Error) -> Self { + Self::Yaml(other) + } +} + +impl From<crate::Error> for Error { + #[inline] + fn from(other: crate::Error) -> Self { + Self::Subst(other) + } +} + +impl std::error::Error for Error {} + +impl std::fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::Yaml(e) => std::fmt::Display::fmt(e, f), + Error::Subst(e) => std::fmt::Display::fmt(e, f), + } + } +} + +/// Recursively apply a function to all string values in a YAML value. +fn visit_string_values<F, E>(value: &mut serde_yaml::Value, fun: F) -> Result<(), E> +where + F: Copy + Fn(&mut String) -> Result<(), E>, +{ + match value { + serde_yaml::Value::Null => Ok(()), + serde_yaml::Value::Bool(_) => Ok(()), + serde_yaml::Value::Number(_) => Ok(()), + serde_yaml::Value::String(val) => fun(val), + serde_yaml::Value::Tagged(tagged) => visit_string_values(&mut tagged.value, fun), + serde_yaml::Value::Sequence(seq) => { + for value in seq { + visit_string_values(value, fun)?; + } + Ok(()) + }, + serde_yaml::Value::Mapping(map) => { + for (_key, value) in map.iter_mut() { + visit_string_values(value, fun)?; + } + Ok(()) + }, + } +} + +#[cfg(test)] +mod test { + use std::collections::HashMap; + + use super::*; + use assert2::{assert, let_assert}; + + #[test] + fn test_from_str() { + #[derive(Debug, serde::Deserialize)] + struct Struct { + bar: String, + baz: String, + } + + let mut variables = HashMap::new(); + variables.insert("bar", "aap"); + variables.insert("baz", "noot"); + let_assert!(Ok(parsed) = from_str(concat!("bar: $bar\n", "baz: $baz/with/stuff\n",), &variables,)); + + let parsed: Struct = parsed; + assert!(parsed.bar == "aap"); + assert!(parsed.baz == "noot/with/stuff"); + } + + #[test] + fn test_from_str_no_substitution() { + #[derive(Debug, serde::Deserialize)] + struct Struct { + bar: String, + baz: String, + } + + let mut variables = HashMap::new(); + variables.insert("bar", "aap"); + variables.insert("baz", "noot"); + let_assert!(Ok(parsed) = from_str(concat!("bar: aap\n", "baz: noot/with/stuff\n",), &crate::NoSubstitution,)); + + let parsed: Struct = parsed; + assert!(parsed.bar == "aap"); + assert!(parsed.baz == "noot/with/stuff"); + } + + #[test] + fn test_yaml_in_var_is_not_parsed() { + #[derive(Debug, serde::Deserialize)] + struct Struct { + bar: String, + baz: String, + } + + let mut variables = HashMap::new(); + variables.insert("bar", "aap\nbaz: mies"); + variables.insert("baz", "noot"); + let_assert!(Ok(parsed) = from_str(concat!("bar: $bar\n", "baz: $baz\n",), &variables,)); + + let parsed: Struct = parsed; + assert!(parsed.bar == "aap\nbaz: mies"); + assert!(parsed.baz == "noot"); + } + + #[test] + fn test_tagged_values_are_substituted() { + #[derive(Debug, serde::Deserialize)] + struct Struct { + bar: String, + baz: String, + } + + let mut variables = HashMap::new(); + variables.insert("bar", "aap\nbaz: mies"); + variables.insert("baz", "noot"); + let_assert!(Ok(parsed) = from_str(concat!("bar: !!string $bar\n", "baz: $baz\n",), &variables,)); + + let parsed: Struct = parsed; + assert!(parsed.bar == "aap\nbaz: mies"); + assert!(parsed.baz == "noot"); + } + + #[test] + fn test_dyn_variable_map() { + #[derive(Debug, serde::Deserialize)] + struct Struct { + bar: String, + baz: String, + } + + let mut variables = HashMap::new(); + variables.insert("bar", "aap"); + variables.insert("baz", "noot"); + let variables: &dyn VariableMap<Value = &&str> = &variables; + + let_assert!(Ok(parsed) = from_str(concat!("bar: $bar\n", "baz: $baz/with/stuff\n",), variables,)); + + let parsed: Struct = parsed; + assert!(parsed.bar == "aap"); + assert!(parsed.baz == "noot/with/stuff"); + } +}