From b647d5ff8f23e619ed822d4928c0c6f54d5a90e4 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Fri, 4 Aug 2023 11:16:28 -0700
Subject: [PATCH 1/5] add tests and code to skip images in comments

This will address #130
---
 R/get_images.R                   |  4 ++-
 tests/testthat/test-get_images.R | 52 ++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/R/get_images.R b/R/get_images.R
index 3d0db70b..c150193d 100644
--- a/R/get_images.R
+++ b/R/get_images.R
@@ -9,8 +9,10 @@
 get_images <- function(yrn, process = TRUE) {
   img    <- ".//md:image"
   hblock <- ".//md:html_block[contains(text(), '<img')]"
+  # comment blocks should not be included in the image processing. 
+  nocomment <- "[not(starts-with(normalize-space(text()), '<!--'))]"
   hline  <- ".//md:html_inline[starts-with(text(), '<img')]"
-  xpath  <- glue::glue("{img} | {hblock} | {hline}")
+  xpath  <- glue::glue("{img} | {hblock}{nocomment} | {hline}")
   images <- xml2::xml_find_all(yrn$body, xpath, yrn$ns)
   images <- if (process) process_images(images) else images
   images
diff --git a/tests/testthat/test-get_images.R b/tests/testthat/test-get_images.R
index 370258b8..9183dddd 100644
--- a/tests/testthat/test-get_images.R
+++ b/tests/testthat/test-get_images.R
@@ -29,3 +29,55 @@ test_that("markdown images will process alt text appropriately", {
   expect_equal(xml2::xml_attr(images, "alt"), expected_alts)
 })
 
+
+test_that("HTML images in comments do not error", {
+  # see https://github.com/carpentries/pegboard/issues/130
+
+  tmp <- withr::local_tempfile()
+
+  all_comment <- r"{
+  some text and 
+
+  <!-- <img src='whoops.png'> -->
+  }"
+  writeLines(all_comment, tmp)
+  ep <- pegboard::Episode$new(tmp)
+  res <- ep$validate_links(warn = FALSE)
+  expect_null(res)
+})
+
+
+test_that("HTML images surrounded by comments are processed. ", {
+
+  tmp <- withr::local_tempfile()
+
+  some_comment<- r"{
+There is only one HTML image here and it will be processed.
+NOTE: each comment block is parsed as an individual HTML block,
+and the numbers in this example help us understand that fact.
+
+<!-- <img src='whoops.png'> ONE -->
+<!-- <img src='whoops.png'> 
+       <img src='whoops.png'> 
+   TWO
+   -->
+   <!-- <img src='whoops.png'> THREE -->
+  <img src='okay.png'> <!-- <img src='whoops.png'> -->
+
+  <!-- <img src='whoops.png'> FOUR -->
+  }"
+  writeLines(some_comment, tmp)
+  ep <- pegboard::Episode$new(tmp)
+  res <- ep$validate_links(warn = FALSE)
+  expect_equal(nrow(res), 1L)
+  # There are four HTML blocks 
+  blocks <- xml2::xml_find_all(ep$body, ".//md:html_block", ns = ep$ns)
+  expect_length(blocks, 4L)
+  # when we extract the number elements, they are what we expect.
+  expect_equal(gsub("[^A-Z]", "", xml2::xml_text(blocks)), 
+    c("ONE", "TWO", "THREE", "FOUR"))
+
+})
+
+
+

From e9fbf51b3e8176be9e3d44b36ada02317e7e5368 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Fri, 4 Aug 2023 11:18:53 -0700
Subject: [PATCH 2/5] make this work for R 3

---
 tests/testthat/test-get_images.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-get_images.R b/tests/testthat/test-get_images.R
index 9183dddd..c3a168f1 100644
--- a/tests/testthat/test-get_images.R
+++ b/tests/testthat/test-get_images.R
@@ -35,11 +35,11 @@ test_that("HTML images in comments do not error", {
 
   tmp <- withr::local_tempfile()
 
-  all_comment <- r"{
+  all_comment <- "
   some text and 
 
   <!-- <img src='whoops.png'> -->
-  }"
+  "
   writeLines(all_comment, tmp)
   ep <- pegboard::Episode$new(tmp)
   res <- ep$validate_links(warn = FALSE)
@@ -51,7 +51,7 @@ test_that("HTML images surrounded by comments are processed. ", {
 
   tmp <- withr::local_tempfile()
 
-  some_comment<- r"{
+  some_comment<- "
 There is only one HTML image here and it will be processed.
 NOTE: each comment block is parsed as an individual HTML block,
 and the numbers in this example help us understand that fact.
@@ -65,7 +65,7 @@ and the numbers in this example help us understand that fact.
   <img src='okay.png'> <!-- <img src='whoops.png'> -->
 
   <!-- <img src='whoops.png'> FOUR -->
-  }"
+  "
   writeLines(some_comment, tmp)
   ep <- pegboard::Episode$new(tmp)
   res <- ep$validate_links(warn = FALSE)

From b57d83f71ddef724ec23122818f081cb1cb3534a Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Fri, 4 Aug 2023 11:31:41 -0700
Subject: [PATCH 3/5] update documentation for get_images()

---
 R/get_images.R          | 26 ++++++++++++++++++++++++++
 man/get_images.Rd       | 28 ++++++++++++++++++++++++++++
 man/pegboard-package.Rd |  1 +
 3 files changed, 55 insertions(+)

diff --git a/R/get_images.R b/R/get_images.R
index c150193d..be38816b 100644
--- a/R/get_images.R
+++ b/R/get_images.R
@@ -5,7 +5,33 @@
 #'   [process_images()] to add the alt attribute and extract images from HTML
 #'   blocks. `FALSE` will present the nodes as found by XPath search.
 #' @return an xml_nodelist
+#' @details Markdown users can write images as either markdown or HTML. If they
+#' write images as HTML, then the commonmark XML parser recognises these as
+#' generic "HTML blocks" and they can't be found by just searching for
+#' `.//md:image`. This function searches both `md:html_block` and
+#' `md:html_inline` for image content that it can extract for downstream
+#' analysis.
+#'
 #' @keywords internal
+#' @examples
+#' tmp <- tempfile()
+#' on.exit(unlink(tmp))
+#' txt <- '
+#' ![a kitten](https://placekitten.com/200/200){alt="a pretty kitten"}
+#'
+#' <!-- an html image of a kitten -->
+#' <img src="https://placekitten.com/200/200">
+#'
+#' an inline html image of a kitten <img src="https://placekitten.com/50/50">
+#' '
+#' writeLines(txt, tmp)
+#' ep <- Episode$new(tmp)
+#' ep$show()
+#' # without process = TRUE, images in HTML elements are not converted
+#' ep$get_images() 
+#' # setting process = TRUE will extract the HTML elements for analysis 
+#' # (e.g to detect alt text)
+#' ep$get_images(process = TRUE)
 get_images <- function(yrn, process = TRUE) {
   img    <- ".//md:image"
   hblock <- ".//md:html_block[contains(text(), '<img')]"
diff --git a/man/get_images.Rd b/man/get_images.Rd
index 5ecbb528..76022aa9 100644
--- a/man/get_images.Rd
+++ b/man/get_images.Rd
@@ -19,4 +19,32 @@ an xml_nodelist
 \description{
 Get images from an Episode/yarn object
 }
+\details{
+Markdown users can write images as either markdown or HTML. If they
+write images as HTML, then the commonmark XML parser recognises these as
+generic "HTML blocks" and they can't be found by just searching for
+\verb{.//md:image}. This function searches both \code{md:html_block} and
+\code{md:html_inline} for image content that it can extract for downstream
+analysis.
+}
+\examples{
+tmp <- tempfile()
+on.exit(unlink(tmp))
+txt <- '
+![a kitten](https://placekitten.com/200/200){alt="a pretty kitten"}
+
+<!-- an html image of a kitten -->
+<img src="https://placekitten.com/200/200">
+
+an inline html image of a kitten <img src="https://placekitten.com/50/50">
+'
+writeLines(txt, tmp)
+ep <- Episode$new(tmp)
+ep$show()
+# without process = TRUE, images in HTML elements are not converted
+ep$get_images() 
+# setting process = TRUE will extract the HTML elements for analysis 
+# (e.g to detect alt text)
+ep$get_images(process = TRUE)
+}
 \keyword{internal}
diff --git a/man/pegboard-package.Rd b/man/pegboard-package.Rd
index 043bdbc9..b44046a6 100644
--- a/man/pegboard-package.Rd
+++ b/man/pegboard-package.Rd
@@ -24,6 +24,7 @@ Useful links:
 Other contributors:
 \itemize{
   \item Toby Hodges \email{tobyhodges@carpentries.org} [contributor]
+  \item Erin Becker \email{ebecker@carpentries.org} [contributor]
 }
 
 }

From e685501646de06bc3ebf085f20ce8054d6c89e70 Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Fri, 4 Aug 2023 11:36:22 -0700
Subject: [PATCH 4/5] add news item

---
 NEWS.md | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index e2bdb84b..bf95fb60 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,19 +1,22 @@
-# pegboard 0.5.3 (unreleased)
+# pegboard 0.5.3 (2023-07-08)
 
 BUG FIX
 -------
 
-* `$move_objectives()` and `$move_questions()` methods no longer place these
-  blocks as the _second_ element in the markdown. This was originally
-  implemented when we thought {dovetail} would be our solution to the lesson 
-  infrastructure (and thus needed a setup chunk before any blocks).
-* liquid-formatted links with markdown inside them are now parsed correctly.
-  This leads lessons to be more accurately transitioned to use {sandpaper}
-  (reported: @uschille,
+* `$validate_links()` no longer throws an error when there are HTML images
+  embedded in comments (reported: @beastyblacksmith, #130; fixed: @zkamvar,
+  #131)
+* (transition) `$move_objectives()` and `$move_questions()` methods no longer
+  place these blocks as the _second_ element in the markdown. This was
+  originally implemented when we thought {dovetail} would be our solution to
+  the lesson infrastructure (and thus needed a setup chunk before any blocks).
+* (transition) liquid-formatted links with markdown inside them are now parsed
+  correctly. This leads lessons to be more accurately transitioned to use
+  {sandpaper} (reported: @uschille,
   https://github.com/carpentries/lesson-transition/issues/46; fixed: @zkamvar,
   #121)
-- images with kramdown attributes that are on a new line are now more accurately
-  transitioned to use {sandpaper} (reported: @uschille,
+- (transition) images with kramdown attributes that are on a new line are now
+  more accurately transitioned to use {sandpaper} (reported: @uschille,
   https://github.com/carpentries/lesson-transition/issues/46; fixed: @zkamvar,
   #121)
 

From 8b1da0fef986bc05931b44f06f8a3671d45a43cc Mon Sep 17 00:00:00 2001
From: "Zhian N. Kamvar" <zkamvar@gmail.com>
Date: Thu, 10 Aug 2023 08:46:02 -0700
Subject: [PATCH 5/5] Update tests/testthat/test-get_images.R

---
 tests/testthat/test-get_images.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-get_images.R b/tests/testthat/test-get_images.R
index c3a168f1..1a4c4dd0 100644
--- a/tests/testthat/test-get_images.R
+++ b/tests/testthat/test-get_images.R
@@ -51,7 +51,7 @@ test_that("HTML images surrounded by comments are processed. ", {
 
   tmp <- withr::local_tempfile()
 
-  some_comment<- "
+  some_comment <- "
 There is only one HTML image here and it will be processed.
 NOTE: each comment block is parsed as an individual HTML block,
 and the numbers in this example help us understand that fact.