diff --git a/NEWS.md b/NEWS.md index e2bdb84b..bf95fb60 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,19 +1,22 @@ -# pegboard 0.5.3 (unreleased) +# pegboard 0.5.3 (2023-07-08) BUG FIX ------- -* `$move_objectives()` and `$move_questions()` methods no longer place these - blocks as the _second_ element in the markdown. This was originally - implemented when we thought {dovetail} would be our solution to the lesson - infrastructure (and thus needed a setup chunk before any blocks). -* liquid-formatted links with markdown inside them are now parsed correctly. - This leads lessons to be more accurately transitioned to use {sandpaper} - (reported: @uschille, +* `$validate_links()` no longer throws an error when there are HTML images + embedded in comments (reported: @beastyblacksmith, #130; fixed: @zkamvar, + #131) +* (transition) `$move_objectives()` and `$move_questions()` methods no longer + place these blocks as the _second_ element in the markdown. This was + originally implemented when we thought {dovetail} would be our solution to + the lesson infrastructure (and thus needed a setup chunk before any blocks). +* (transition) liquid-formatted links with markdown inside them are now parsed + correctly. This leads lessons to be more accurately transitioned to use + {sandpaper} (reported: @uschille, https://github.com/carpentries/lesson-transition/issues/46; fixed: @zkamvar, #121) -- images with kramdown attributes that are on a new line are now more accurately - transitioned to use {sandpaper} (reported: @uschille, +- (transition) images with kramdown attributes that are on a new line are now + more accurately transitioned to use {sandpaper} (reported: @uschille, https://github.com/carpentries/lesson-transition/issues/46; fixed: @zkamvar, #121) diff --git a/R/get_images.R b/R/get_images.R index 3d0db70b..be38816b 100644 --- a/R/get_images.R +++ b/R/get_images.R @@ -5,12 +5,40 @@ #' [process_images()] to add the alt attribute and extract images from HTML #' blocks. `FALSE` will present the nodes as found by XPath search. #' @return an xml_nodelist +#' @details Markdown users can write images as either markdown or HTML. If they +#' write images as HTML, then the commonmark XML parser recognises these as +#' generic "HTML blocks" and they can't be found by just searching for +#' `.//md:image`. This function searches both `md:html_block` and +#' `md:html_inline` for image content that it can extract for downstream +#' analysis. +#' #' @keywords internal +#' @examples +#' tmp <- tempfile() +#' on.exit(unlink(tmp)) +#' txt <- ' +#' ![a kitten](https://placekitten.com/200/200){alt="a pretty kitten"} +#' +#' +#' +#' +#' an inline html image of a kitten +#' ' +#' writeLines(txt, tmp) +#' ep <- Episode$new(tmp) +#' ep$show() +#' # without process = TRUE, images in HTML elements are not converted +#' ep$get_images() +#' # setting process = TRUE will extract the HTML elements for analysis +#' # (e.g to detect alt text) +#' ep$get_images(process = TRUE) get_images <- function(yrn, process = TRUE) { img <- ".//md:image" hblock <- ".//md:html_block[contains(text(), ' + + +an inline html image of a kitten +' +writeLines(txt, tmp) +ep <- Episode$new(tmp) +ep$show() +# without process = TRUE, images in HTML elements are not converted +ep$get_images() +# setting process = TRUE will extract the HTML elements for analysis +# (e.g to detect alt text) +ep$get_images(process = TRUE) +} \keyword{internal} diff --git a/man/pegboard-package.Rd b/man/pegboard-package.Rd index 043bdbc9..b44046a6 100644 --- a/man/pegboard-package.Rd +++ b/man/pegboard-package.Rd @@ -24,6 +24,7 @@ Useful links: Other contributors: \itemize{ \item Toby Hodges \email{tobyhodges@carpentries.org} [contributor] + \item Erin Becker \email{ebecker@carpentries.org} [contributor] } } diff --git a/tests/testthat/test-get_images.R b/tests/testthat/test-get_images.R index 370258b8..1a4c4dd0 100644 --- a/tests/testthat/test-get_images.R +++ b/tests/testthat/test-get_images.R @@ -29,3 +29,55 @@ test_that("markdown images will process alt text appropriately", { expect_equal(xml2::xml_attr(images, "alt"), expected_alts) }) + +test_that("HTML images in comments do not error", { + # see https://github.com/carpentries/pegboard/issues/130 + + tmp <- withr::local_tempfile() + + all_comment <- " + some text and + + + " + writeLines(all_comment, tmp) + ep <- pegboard::Episode$new(tmp) + res <- ep$validate_links(warn = FALSE) + expect_null(res) +}) + + +test_that("HTML images surrounded by comments are processed. ", { + + tmp <- withr::local_tempfile() + + some_comment <- " +There is only one HTML image here and it will be processed. +NOTE: each comment block is parsed as an individual HTML block, +and the numbers in this example help us understand that fact. + + + + + + + + " + writeLines(some_comment, tmp) + ep <- pegboard::Episode$new(tmp) + res <- ep$validate_links(warn = FALSE) + expect_equal(nrow(res), 1L) + # There are four HTML blocks + blocks <- xml2::xml_find_all(ep$body, ".//md:html_block", ns = ep$ns) + expect_length(blocks, 4L) + # when we extract the number elements, they are what we expect. + expect_equal(gsub("[^A-Z]", "", xml2::xml_text(blocks)), + c("ONE", "TWO", "THREE", "FOUR")) + +}) + + +