From b647d5ff8f23e619ed822d4928c0c6f54d5a90e4 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Fri, 4 Aug 2023 11:16:28 -0700 Subject: [PATCH 1/5] add tests and code to skip images in comments This will address #130 --- R/get_images.R | 4 ++- tests/testthat/test-get_images.R | 52 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/R/get_images.R b/R/get_images.R index 3d0db70b..c150193d 100644 --- a/R/get_images.R +++ b/R/get_images.R @@ -9,8 +9,10 @@ get_images <- function(yrn, process = TRUE) { img <- ".//md:image" hblock <- ".//md:html_block[contains(text(), ' --> + }" + writeLines(all_comment, tmp) + ep <- pegboard::Episode$new(tmp) + res <- ep$validate_links(warn = FALSE) + expect_null(res) +}) + + +test_that("HTML images surrounded by comments are processed. ", { + + tmp <- withr::local_tempfile() + + some_comment<- r"{ +There is only one HTML image here and it will be processed. +NOTE: each comment block is parsed as an individual HTML block, +and the numbers in this example help us understand that fact. + + + + + + + + }" + writeLines(some_comment, tmp) + ep <- pegboard::Episode$new(tmp) + res <- ep$validate_links(warn = FALSE) + expect_equal(nrow(res), 1L) + # There are four HTML blocks + blocks <- xml2::xml_find_all(ep$body, ".//md:html_block", ns = ep$ns) + expect_length(blocks, 4L) + # when we extract the number elements, they are what we expect. + expect_equal(gsub("[^A-Z]", "", xml2::xml_text(blocks)), + c("ONE", "TWO", "THREE", "FOUR")) + +}) + + + From e9fbf51b3e8176be9e3d44b36ada02317e7e5368 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Fri, 4 Aug 2023 11:18:53 -0700 Subject: [PATCH 2/5] make this work for R 3 --- tests/testthat/test-get_images.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-get_images.R b/tests/testthat/test-get_images.R index 9183dddd..c3a168f1 100644 --- a/tests/testthat/test-get_images.R +++ b/tests/testthat/test-get_images.R @@ -35,11 +35,11 @@ test_that("HTML images in comments do not error", { tmp <- withr::local_tempfile() - all_comment <- r"{ + all_comment <- " some text and - }" + " writeLines(all_comment, tmp) ep <- pegboard::Episode$new(tmp) res <- ep$validate_links(warn = FALSE) @@ -51,7 +51,7 @@ test_that("HTML images surrounded by comments are processed. ", { tmp <- withr::local_tempfile() - some_comment<- r"{ + some_comment<- " There is only one HTML image here and it will be processed. NOTE: each comment block is parsed as an individual HTML block, and the numbers in this example help us understand that fact. @@ -65,7 +65,7 @@ and the numbers in this example help us understand that fact. - }" + " writeLines(some_comment, tmp) ep <- pegboard::Episode$new(tmp) res <- ep$validate_links(warn = FALSE) From b57d83f71ddef724ec23122818f081cb1cb3534a Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Fri, 4 Aug 2023 11:31:41 -0700 Subject: [PATCH 3/5] update documentation for get_images() --- R/get_images.R | 26 ++++++++++++++++++++++++++ man/get_images.Rd | 28 ++++++++++++++++++++++++++++ man/pegboard-package.Rd | 1 + 3 files changed, 55 insertions(+) diff --git a/R/get_images.R b/R/get_images.R index c150193d..be38816b 100644 --- a/R/get_images.R +++ b/R/get_images.R @@ -5,7 +5,33 @@ #' [process_images()] to add the alt attribute and extract images from HTML #' blocks. `FALSE` will present the nodes as found by XPath search. #' @return an xml_nodelist +#' @details Markdown users can write images as either markdown or HTML. If they +#' write images as HTML, then the commonmark XML parser recognises these as +#' generic "HTML blocks" and they can't be found by just searching for +#' `.//md:image`. This function searches both `md:html_block` and +#' `md:html_inline` for image content that it can extract for downstream +#' analysis. +#' #' @keywords internal +#' @examples +#' tmp <- tempfile() +#' on.exit(unlink(tmp)) +#' txt <- ' +#' ![a kitten](https://placekitten.com/200/200){alt="a pretty kitten"} +#' +#' +#' +#' +#' an inline html image of a kitten +#' ' +#' writeLines(txt, tmp) +#' ep <- Episode$new(tmp) +#' ep$show() +#' # without process = TRUE, images in HTML elements are not converted +#' ep$get_images() +#' # setting process = TRUE will extract the HTML elements for analysis +#' # (e.g to detect alt text) +#' ep$get_images(process = TRUE) get_images <- function(yrn, process = TRUE) { img <- ".//md:image" hblock <- ".//md:html_block[contains(text(), ' + + +an inline html image of a kitten +' +writeLines(txt, tmp) +ep <- Episode$new(tmp) +ep$show() +# without process = TRUE, images in HTML elements are not converted +ep$get_images() +# setting process = TRUE will extract the HTML elements for analysis +# (e.g to detect alt text) +ep$get_images(process = TRUE) +} \keyword{internal} diff --git a/man/pegboard-package.Rd b/man/pegboard-package.Rd index 043bdbc9..b44046a6 100644 --- a/man/pegboard-package.Rd +++ b/man/pegboard-package.Rd @@ -24,6 +24,7 @@ Useful links: Other contributors: \itemize{ \item Toby Hodges \email{tobyhodges@carpentries.org} [contributor] + \item Erin Becker \email{ebecker@carpentries.org} [contributor] } } From e685501646de06bc3ebf085f20ce8054d6c89e70 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Fri, 4 Aug 2023 11:36:22 -0700 Subject: [PATCH 4/5] add news item --- NEWS.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index e2bdb84b..bf95fb60 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,19 +1,22 @@ -# pegboard 0.5.3 (unreleased) +# pegboard 0.5.3 (2023-07-08) BUG FIX ------- -* `$move_objectives()` and `$move_questions()` methods no longer place these - blocks as the _second_ element in the markdown. This was originally - implemented when we thought {dovetail} would be our solution to the lesson - infrastructure (and thus needed a setup chunk before any blocks). -* liquid-formatted links with markdown inside them are now parsed correctly. - This leads lessons to be more accurately transitioned to use {sandpaper} - (reported: @uschille, +* `$validate_links()` no longer throws an error when there are HTML images + embedded in comments (reported: @beastyblacksmith, #130; fixed: @zkamvar, + #131) +* (transition) `$move_objectives()` and `$move_questions()` methods no longer + place these blocks as the _second_ element in the markdown. This was + originally implemented when we thought {dovetail} would be our solution to + the lesson infrastructure (and thus needed a setup chunk before any blocks). +* (transition) liquid-formatted links with markdown inside them are now parsed + correctly. This leads lessons to be more accurately transitioned to use + {sandpaper} (reported: @uschille, https://github.com/carpentries/lesson-transition/issues/46; fixed: @zkamvar, #121) -- images with kramdown attributes that are on a new line are now more accurately - transitioned to use {sandpaper} (reported: @uschille, +- (transition) images with kramdown attributes that are on a new line are now + more accurately transitioned to use {sandpaper} (reported: @uschille, https://github.com/carpentries/lesson-transition/issues/46; fixed: @zkamvar, #121) From 8b1da0fef986bc05931b44f06f8a3671d45a43cc Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 10 Aug 2023 08:46:02 -0700 Subject: [PATCH 5/5] Update tests/testthat/test-get_images.R --- tests/testthat/test-get_images.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-get_images.R b/tests/testthat/test-get_images.R index c3a168f1..1a4c4dd0 100644 --- a/tests/testthat/test-get_images.R +++ b/tests/testthat/test-get_images.R @@ -51,7 +51,7 @@ test_that("HTML images surrounded by comments are processed. ", { tmp <- withr::local_tempfile() - some_comment<- " + some_comment <- " There is only one HTML image here and it will be processed. NOTE: each comment block is parsed as an individual HTML block, and the numbers in this example help us understand that fact.