diff --git a/NEWS.md b/NEWS.md
index e2bdb84b..bf95fb60 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,19 +1,22 @@
-# pegboard 0.5.3 (unreleased)
+# pegboard 0.5.3 (2023-07-08)
BUG FIX
-------
-* `$move_objectives()` and `$move_questions()` methods no longer place these
- blocks as the _second_ element in the markdown. This was originally
- implemented when we thought {dovetail} would be our solution to the lesson
- infrastructure (and thus needed a setup chunk before any blocks).
-* liquid-formatted links with markdown inside them are now parsed correctly.
- This leads lessons to be more accurately transitioned to use {sandpaper}
- (reported: @uschille,
+* `$validate_links()` no longer throws an error when there are HTML images
+ embedded in comments (reported: @beastyblacksmith, #130; fixed: @zkamvar,
+ #131)
+* (transition) `$move_objectives()` and `$move_questions()` methods no longer
+ place these blocks as the _second_ element in the markdown. This was
+ originally implemented when we thought {dovetail} would be our solution to
+ the lesson infrastructure (and thus needed a setup chunk before any blocks).
+* (transition) liquid-formatted links with markdown inside them are now parsed
+ correctly. This leads lessons to be more accurately transitioned to use
+ {sandpaper} (reported: @uschille,
https://github.com/carpentries/lesson-transition/issues/46; fixed: @zkamvar,
#121)
-- images with kramdown attributes that are on a new line are now more accurately
- transitioned to use {sandpaper} (reported: @uschille,
+- (transition) images with kramdown attributes that are on a new line are now
+ more accurately transitioned to use {sandpaper} (reported: @uschille,
https://github.com/carpentries/lesson-transition/issues/46; fixed: @zkamvar,
#121)
diff --git a/R/get_images.R b/R/get_images.R
index 3d0db70b..be38816b 100644
--- a/R/get_images.R
+++ b/R/get_images.R
@@ -5,12 +5,40 @@
#' [process_images()] to add the alt attribute and extract images from HTML
#' blocks. `FALSE` will present the nodes as found by XPath search.
#' @return an xml_nodelist
+#' @details Markdown users can write images as either markdown or HTML. If they
+#' write images as HTML, then the commonmark XML parser recognises these as
+#' generic "HTML blocks" and they can't be found by just searching for
+#' `.//md:image`. This function searches both `md:html_block` and
+#' `md:html_inline` for image content that it can extract for downstream
+#' analysis.
+#'
#' @keywords internal
+#' @examples
+#' tmp <- tempfile()
+#' on.exit(unlink(tmp))
+#' txt <- '
+#' ![a kitten](https://placekitten.com/200/200){alt="a pretty kitten"}
+#'
+#'
+#'
+#'
+#' an inline html image of a kitten
+#' '
+#' writeLines(txt, tmp)
+#' ep <- Episode$new(tmp)
+#' ep$show()
+#' # without process = TRUE, images in HTML elements are not converted
+#' ep$get_images()
+#' # setting process = TRUE will extract the HTML elements for analysis
+#' # (e.g to detect alt text)
+#' ep$get_images(process = TRUE)
get_images <- function(yrn, process = TRUE) {
img <- ".//md:image"
hblock <- ".//md:html_block[contains(text(), '
+
+
+an inline html image of a kitten
+'
+writeLines(txt, tmp)
+ep <- Episode$new(tmp)
+ep$show()
+# without process = TRUE, images in HTML elements are not converted
+ep$get_images()
+# setting process = TRUE will extract the HTML elements for analysis
+# (e.g to detect alt text)
+ep$get_images(process = TRUE)
+}
\keyword{internal}
diff --git a/man/pegboard-package.Rd b/man/pegboard-package.Rd
index 043bdbc9..b44046a6 100644
--- a/man/pegboard-package.Rd
+++ b/man/pegboard-package.Rd
@@ -24,6 +24,7 @@ Useful links:
Other contributors:
\itemize{
\item Toby Hodges \email{tobyhodges@carpentries.org} [contributor]
+ \item Erin Becker \email{ebecker@carpentries.org} [contributor]
}
}
diff --git a/tests/testthat/test-get_images.R b/tests/testthat/test-get_images.R
index 370258b8..1a4c4dd0 100644
--- a/tests/testthat/test-get_images.R
+++ b/tests/testthat/test-get_images.R
@@ -29,3 +29,55 @@ test_that("markdown images will process alt text appropriately", {
expect_equal(xml2::xml_attr(images, "alt"), expected_alts)
})
+
+test_that("HTML images in comments do not error", {
+ # see https://github.com/carpentries/pegboard/issues/130
+
+ tmp <- withr::local_tempfile()
+
+ all_comment <- "
+ some text and
+
+
+ "
+ writeLines(all_comment, tmp)
+ ep <- pegboard::Episode$new(tmp)
+ res <- ep$validate_links(warn = FALSE)
+ expect_null(res)
+})
+
+
+test_that("HTML images surrounded by comments are processed. ", {
+
+ tmp <- withr::local_tempfile()
+
+ some_comment <- "
+There is only one HTML image here and it will be processed.
+NOTE: each comment block is parsed as an individual HTML block,
+and the numbers in this example help us understand that fact.
+
+
+
+
+
+
+
+ "
+ writeLines(some_comment, tmp)
+ ep <- pegboard::Episode$new(tmp)
+ res <- ep$validate_links(warn = FALSE)
+ expect_equal(nrow(res), 1L)
+ # There are four HTML blocks
+ blocks <- xml2::xml_find_all(ep$body, ".//md:html_block", ns = ep$ns)
+ expect_length(blocks, 4L)
+ # when we extract the number elements, they are what we expect.
+ expect_equal(gsub("[^A-Z]", "", xml2::xml_text(blocks)),
+ c("ONE", "TWO", "THREE", "FOUR"))
+
+})
+
+
+