From 058580680964618c486174d5ed2fdddb9890b267 Mon Sep 17 00:00:00 2001 From: Yuval Kogman Date: Mon, 21 Oct 2024 21:10:36 +0200 Subject: [PATCH] Specify escaped characters more conservatively MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit '#'' is not in the set qchar indirectly defined in BIP 21, and therefore should be escaped. [BIP 21](https://github.com/bitcoin/bips/blob/master/bip-0021.mediawiki#abnf-grammar): > labelparam = "label=" *qchar > messageparam = "message=" *qchar > otherparam = qchar *qchar [ "=" *qchar ] ... > Here, "qchar" corresponds to valid characters of an RFC 3986 URI query > component, excluding the "=" and "&" characters, which this BIP takes > as separators. [RFC 3986 ยง 3.4](https://www.rfc-editor.org/rfc/rfc3986#section-3.4): > The query component is indicated by the first question mark ("?") > character and terminated by a number sign ("#") character or by the > end of the URI. [RFC 3986 Appendix A](https://www.rfc-editor.org/rfc/rfc3986#appendix-A): > pchar = unreserved / pct-encoded / sub-delims / ":" / "@" > query = *( pchar / "/" / "?" ) ... > pct-encoded = "%" HEXDIG HEXDIG > unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" ... > sub-delims = "!" / "$" / "&" / "'" / "(" / ")" > / "*" / "+" / "," / ";" / "=" --- src/lib.rs | 17 +++++++++++++++-- src/ser.rs | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5867071..871d543 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -432,11 +432,24 @@ mod tests { #[test] fn label_with_rfc3986_param_separator() { - let input = "bitcoin:1andreas3batLhQa2FawWjeyjCqyBzypd?label=foo%26bar%20%3D%20baz%3F"; + let input = "bitcoin:1andreas3batLhQa2FawWjeyjCqyBzypd?label=foo%26bar%20%3D%20baz/blah?;:@"; let uri = input.parse::>().unwrap().require_network(bitcoin::Network::Bitcoin).unwrap(); let label: Cow<'_, str> = uri.label.clone().unwrap().try_into().unwrap(); assert_eq!(uri.address.to_string(), "1andreas3batLhQa2FawWjeyjCqyBzypd"); - assert_eq!(label, "foo&bar = baz?"); + assert_eq!(label, "foo&bar = baz/blah?;:@"); + assert!(uri.amount.is_none()); + assert!(uri.message.is_none()); + + assert_eq!(uri.to_string(), input); + } + + #[test] + fn label_with_rfc3986_fragment_separator() { + let input = "bitcoin:1andreas3batLhQa2FawWjeyjCqyBzypd?label=foo%23bar"; + let uri = input.parse::>().unwrap().require_network(bitcoin::Network::Bitcoin).unwrap(); + let label: Cow<'_, str> = uri.label.clone().unwrap().try_into().unwrap(); + assert_eq!(uri.address.to_string(), "1andreas3batLhQa2FawWjeyjCqyBzypd"); + assert_eq!(label, "foo#bar"); assert!(uri.amount.is_none()); assert!(uri.message.is_none()); diff --git a/src/ser.rs b/src/ser.rs index 997a511..678b669 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -47,7 +47,61 @@ impl<'a, W: fmt::Write> fmt::Write for EqSignChecker<'a, W> { } /// Set of characters that will be percent-encoded -const ASCII_SET: percent_encoding_rfc3986::AsciiSet = percent_encoding_rfc3986::CONTROLS.add(b'&').add(b'?').add(b' ').add(b'='); +/// +/// This contains anything not in `query` (i.e. ``gen-delim` from the quoted +/// definitions`) as per RFC 3986, as well as '&' and '=' as per BIP 21. +/// +/// [BIP 21](https://github.com/bitcoin/bips/blob/master/bip-0021.mediawiki#abnf-grammar): +/// +/// > ```text +/// > labelparam = "label=" *qchar +/// > messageparam = "message=" *qchar +/// > otherparam = qchar *qchar [ "=" *qchar ] +/// > ``` +/// ... +/// > Here, "qchar" corresponds to valid characters of an RFC 3986 URI > query +/// component, excluding the "=" and "&" characters, which this BIP > takes as +/// separators. +/// +/// [RFC 3986 Appendix A](https://www.rfc-editor.org/rfc/rfc3986#appendix-A): +/// +/// > ```text +/// > pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +/// > query = *( pchar / "/" / "?" ) +/// > ``` +/// ... +/// > ```text +/// > pct-encoded = "%" HEXDIG HEXDIG +/// > unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +/// > ``` +/// ... +/// > ```text +/// > sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +/// > / "*" / "+" / "," / ";" / "=" +/// > ``` +const ASCII_SET: percent_encoding_rfc3986::AsciiSet = percent_encoding_rfc3986::NON_ALPHANUMERIC + // allow non-alphanumeric characters from `unreserved` + .remove(b'-') + .remove(b'.') + .remove(b'_') + .remove(b'~') + // allow non-alphanumeric characters from `sub-delims` excluding bip-21 + // separators ("&", and "=") + .remove(b'!') + .remove(b'$') + .remove(b'\'') + .remove(b'(') + .remove(b')') + .remove(b'*') + .remove(b'+') + .remove(b',') + .remove(b';') + // allow pchar extra chars + .remove(b':') + .remove(b'@') + // allow query extra chars + .remove(b'/') + .remove(b'?'); /// Percent-encodes writes. struct WriterEncoder(W);