Skip to content

Commit

Permalink
preserve entities when stripping HTML for MathJax
Browse files Browse the repository at this point in the history
  • Loading branch information
dae committed Apr 30, 2020
1 parent e27d015 commit c95983a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
15 changes: 13 additions & 2 deletions rslib/src/cloze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

use crate::latex::contains_latex;
use crate::template::RenderContext;
use crate::text::strip_html;
use crate::text::strip_html_preserving_entities;
use lazy_static::lazy_static;
use regex::Captures;
use regex::Regex;
Expand Down Expand Up @@ -124,7 +124,8 @@ fn strip_html_inside_mathjax(text: &str) -> Cow<str> {
format!(
"{}{}{}",
caps.get(mathjax_caps::OPENING_TAG).unwrap().as_str(),
strip_html(caps.get(mathjax_caps::INNER_TEXT).unwrap().as_str()).as_ref(),
strip_html_preserving_entities(caps.get(mathjax_caps::INNER_TEXT).unwrap().as_str())
.as_ref(),
caps.get(mathjax_caps::CLOSING_TAG).unwrap().as_str()
)
})
Expand All @@ -140,6 +141,7 @@ pub(crate) fn cloze_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a

#[cfg(test)]
mod test {
use super::strip_html_inside_mathjax;
use crate::cloze::{cloze_numbers_in_string, expand_clozes_to_reveal_latex};
use crate::text::strip_html;
use std::collections::HashSet;
Expand All @@ -166,4 +168,13 @@ mod test {
assert!(expanded.contains("[...] bar"));
assert!(expanded.contains("foo bar"));
}

#[test]
fn mathjax_html() {
// escaped angle brackets should be preserved
assert_eq!(
strip_html_inside_mathjax(r"\(<foo>&lt;&gt;</foo>\)"),
r"\(&lt;&gt;\)"
);
}
}
6 changes: 5 additions & 1 deletion rslib/src/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ lazy_static! {
pub fn strip_html(html: &str) -> Cow<str> {
let mut out: Cow<str> = html.into();

if let Cow::Owned(o) = HTML.replace_all(html, "") {
if let Cow::Owned(o) = strip_html_preserving_entities(html) {
out = o.into();
}

Expand All @@ -88,6 +88,10 @@ pub fn strip_html(html: &str) -> Cow<str> {
out
}

pub fn strip_html_preserving_entities(html: &str) -> Cow<str> {
HTML.replace_all(html, "")
}

pub fn decode_entities(html: &str) -> Cow<str> {
if html.contains('&') {
match htmlescape::decode_html(html) {
Expand Down

0 comments on commit c95983a

Please sign in to comment.