From c95983ac1f779fbc0bfb8bd943c89b9d964fb3b8 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Thu, 30 Apr 2020 11:17:38 +1000 Subject: [PATCH] preserve entities when stripping HTML for MathJax https://anki.tenderapp.com/discussions/ankidesktop/40987-how-to-render-angled-brackets --- rslib/src/cloze.rs | 15 +++++++++++++-- rslib/src/text.rs | 6 +++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/rslib/src/cloze.rs b/rslib/src/cloze.rs index 244e0ee63c7..42569068087 100644 --- a/rslib/src/cloze.rs +++ b/rslib/src/cloze.rs @@ -3,7 +3,7 @@ use crate::latex::contains_latex; use crate::template::RenderContext; -use crate::text::strip_html; +use crate::text::strip_html_preserving_entities; use lazy_static::lazy_static; use regex::Captures; use regex::Regex; @@ -124,7 +124,8 @@ fn strip_html_inside_mathjax(text: &str) -> Cow { format!( "{}{}{}", caps.get(mathjax_caps::OPENING_TAG).unwrap().as_str(), - strip_html(caps.get(mathjax_caps::INNER_TEXT).unwrap().as_str()).as_ref(), + strip_html_preserving_entities(caps.get(mathjax_caps::INNER_TEXT).unwrap().as_str()) + .as_ref(), caps.get(mathjax_caps::CLOSING_TAG).unwrap().as_str() ) }) @@ -140,6 +141,7 @@ pub(crate) fn cloze_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a #[cfg(test)] mod test { + use super::strip_html_inside_mathjax; use crate::cloze::{cloze_numbers_in_string, expand_clozes_to_reveal_latex}; use crate::text::strip_html; use std::collections::HashSet; @@ -166,4 +168,13 @@ mod test { assert!(expanded.contains("[...] bar")); assert!(expanded.contains("foo bar")); } + + #[test] + fn mathjax_html() { + // escaped angle brackets should be preserved + assert_eq!( + strip_html_inside_mathjax(r"\(<>\)"), + r"\(<>\)" + ); + } } diff --git a/rslib/src/text.rs b/rslib/src/text.rs index a88bd4a4f87..f75aed14e38 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -77,7 +77,7 @@ lazy_static! { pub fn strip_html(html: &str) -> Cow { let mut out: Cow = html.into(); - if let Cow::Owned(o) = HTML.replace_all(html, "") { + if let Cow::Owned(o) = strip_html_preserving_entities(html) { out = o.into(); } @@ -88,6 +88,10 @@ pub fn strip_html(html: &str) -> Cow { out } +pub fn strip_html_preserving_entities(html: &str) -> Cow { + HTML.replace_all(html, "") +} + pub fn decode_entities(html: &str) -> Cow { if html.contains('&') { match htmlescape::decode_html(html) {