Skip to content

Commit

Permalink
replace {httr} with {curl} (#249)
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb authored Feb 24, 2025
1 parent 900c997 commit 18df38e
Show file tree
Hide file tree
Showing 11 changed files with 234 additions and 98 deletions.
2 changes: 1 addition & 1 deletion .ci/build-docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ sudo tlmgr --verify-repo=none update --self
sudo tlmgr --verify-repo=none install inconsolata helvetic rsfs

# install dependencies
Rscript -e "install.packages(c('assertthat', 'data.table', 'futile.logger', 'httr', 'jsonlite', 'knitr', 'markdown', 'pkgdown', 'purrr', 'roxygen2', 'stringr'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
Rscript -e "install.packages(c('assertthat', 'curl', 'data.table', 'futile.logger', 'jsonlite', 'knitr', 'markdown', 'pkgdown', 'purrr', 'roxygen2', 'stringr'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"

cp NEWS.md ./r-pkg/
cp README.md ./r-pkg/
Expand Down
2 changes: 1 addition & 1 deletion .ci/lint-r-code.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ LINTERS_TO_USE <- list(
, "spaces_left_parens" = lintr::spaces_left_parentheses_linter()
, "sprintf" = lintr::sprintf_linter()
, "string_boundary" = lintr::string_boundary_linter()
, "todo_comments" = lintr::todo_comment_linter(c("todo", "fixme", "to-do"))
#, "todo_comments" = lintr::todo_comment_linter(c("todo", "fixme", "to-do"))
, "trailing_blank" = lintr::trailing_blank_lines_linter()
, "trailing_white" = lintr::trailing_whitespace_linter()
, "true_false" = lintr::T_and_F_symbol_linter()
Expand Down
2 changes: 1 addition & 1 deletion .ci/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ sudo apt-get install \
tidy \
qpdf

Rscript -e "install.packages(c('covr', 'data.table', 'futile.logger', 'httr', 'jsonlite', 'knitr', 'lintr', 'markdown', 'purrr', 'stringr', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
Rscript -e "install.packages(c('covr', 'curl', 'data.table', 'futile.logger', 'jsonlite', 'knitr', 'lintr', 'markdown', 'purrr', 'stringr', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
cp test-data/* r-pkg/inst/testdata/
6 changes: 3 additions & 3 deletions r-pkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ Description:
Depends:
R (>= 3.3.0)
Imports:
curl,
data.table,
futile.logger,
httr,
jsonlite,
purrr,
stringr,
utils
stats,
stringr
Suggests:
knitr,
markdown,
Expand Down
11 changes: 6 additions & 5 deletions r-pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ export(es_search)
export(get_fields)
export(parse_date_time)
export(unpack_nested_data)
importFrom(curl,curl_fetch_memory)
importFrom(curl,handle_setheaders)
importFrom(curl,handle_setopt)
importFrom(curl,new_handle)
importFrom(data.table,":=")
importFrom(data.table,as.data.table)
importFrom(data.table,copy)
Expand All @@ -16,13 +20,10 @@ importFrom(data.table,setcolorder)
importFrom(data.table,setkeyv)
importFrom(data.table,setnames)
importFrom(data.table,uniqueN)
importFrom(futile.logger,flog.debug)
importFrom(futile.logger,flog.fatal)
importFrom(futile.logger,flog.info)
importFrom(futile.logger,flog.warn)
importFrom(httr,RETRY)
importFrom(httr,add_headers)
importFrom(httr,content)
importFrom(httr,stop_for_status)
importFrom(jsonlite,fromJSON)
importFrom(parallel,clusterMap)
importFrom(parallel,detectCores)
Expand All @@ -34,8 +35,8 @@ importFrom(purrr,map_if)
importFrom(purrr,map_int)
importFrom(purrr,map_lgl)
importFrom(purrr,simplify)
importFrom(stats,runif)
importFrom(stringr,str_extract)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_split)
importFrom(stringr,str_split_fixed)
importFrom(utils,read.table)
4 changes: 0 additions & 4 deletions r-pkg/R/es_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,6 @@ es_search <- function(es_host
result <- .request(
verb = "POST"
, url = scroll_url
, headers = c("Content-Type" = "application/json") # nolint[non_portable_path]
, body = sprintf('{"scroll": "%s", "scroll_id": "%s"}', scroll, scroll_id)
)
return(result)
Expand All @@ -558,7 +557,6 @@ es_search <- function(es_host
result <- .request(
verb = "POST"
, url = scroll_url
, headers = c("Content-Type" = "application/json") # nolint[non_portable_path]
, body = scroll_id
)
return(result)
Expand Down Expand Up @@ -634,7 +632,6 @@ es_search <- function(es_host
result <- .request(
verb = "GET"
, url = es_host
, headers = c("Content-Type" = "application/json") # nolint[non_portable_path]
, body = NULL
)
.stop_for_status(result)
Expand Down Expand Up @@ -719,7 +716,6 @@ es_search <- function(es_host
result <- .request(
verb = "POST"
, url = reqURL
, headers = c("Content-Type" = "application/json") # nolint[non_portable_path]
, body = query_body
)
.stop_for_status(result)
Expand Down
58 changes: 9 additions & 49 deletions r-pkg/R/get_fields.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ get_fields <- function(es_host
res <- .request(
verb = "GET"
, url = sprintf("%s/_cat/indices?format=json", es_url)
, headers = character()
, body = NULL
)
indexDT <- data.table::as.data.table(
Expand All @@ -112,7 +111,6 @@ get_fields <- function(es_host
result <- .request(
verb = "GET"
, url = es_url
, headers = c("Content-Type" = "application/json") # nolint[non_portable_path]
, body = NULL
)
.stop_for_status(result)
Expand Down Expand Up @@ -221,6 +219,8 @@ get_fields <- function(es_host

# [title] Get a data.table containing names of indices and aliases
# [es_host] A string identifying an Elasticsearch host.
#' @importFrom data.table as.data.table
#' @importFrom jsonlite fromJSON
.get_aliases <- function(es_host) {

# construct the url to the alias endpoint
Expand All @@ -230,7 +230,6 @@ get_fields <- function(es_host
result <- .request(
verb = "GET"
, url = url
, headers = c("Content-Type" = "application/json") # nolint[non_portable_path]
, body = NULL
)
.stop_for_status(result)
Expand All @@ -243,52 +242,13 @@ get_fields <- function(es_host
# there are no aliases in this Elasticsearch cluster
return(invisible(NULL))
} else {
major_version <- .get_es_version(es_host)
process_alias <- switch(
major_version
, "1" = .process_legacy_alias
, "2" = .process_legacy_alias
, "5" = .process_new_alias
, "6" = .process_new_alias
, .process_new_alias
aliasDT <- data.table::as.data.table(
jsonlite::fromJSON(
resultContent
, simplifyDataFrame = TRUE
, flatten = TRUE
)
)
return(process_alias(alias_string = resultContent))
return(aliasDT[, .(alias, index)])
}
}


# [title] Process the string returned by the GET alias API into a data.table
# [description] Older version of Elasticsearch (pre-5.x) had a slightly different return
# format for aliases. This handles those
# [alias_string] A string returned by the alias API with index and alias name
#' @importFrom data.table as.data.table
#' @importFrom jsonlite fromJSON
.process_legacy_alias <- function(alias_string) {
aliasDT <- data.table::as.data.table(
jsonlite::fromJSON(
alias_string
, simplifyDataFrame = TRUE
, flatten = TRUE
)
)
return(aliasDT[, .(alias, index)])
}

# [title] Process the string returned by the GET alias API into a data.table
# [description] This only works for Elasticsearch 5 and up
# [alias_string] A string returned by the alias API with index and alias name
#' @importFrom data.table data.table
#' @importFrom utils read.table
.process_new_alias <- function(alias_string) {

# process the string provided by the /_cat/aliases API into a data.frame and then a data.table
aliasDT <- data.table::data.table(
utils::read.table(
text = alias_string
, stringsAsFactors = FALSE
)
)

# return only the first two columns
return(aliasDT[, .(alias = V1, index = V2)])
}
148 changes: 133 additions & 15 deletions r-pkg/R/helperfuns.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,20 @@
# [name] .content
# [description] Mainly here to making mocking easier in testing.
# [references] https://testthat.r-lib.org/reference/local_mocked_bindings.html#namespaced-calls
#' @importFrom httr content
#' @importFrom jsonlite fromJSON
.content <- function(response, as) {
return(httr::content(response, as = as))
text_content <- rawToChar(response$content)
if (as == "text") {
return(text_content)
}

# if not plain text, assume we want to parse JSON into an R list
return(jsonlite::fromJSON(
txt = text_content
, simplifyVector = FALSE
, simplifyDataFrame = FALSE
, simplifyMatrix = FALSE
))
}

# [title] Get a random length-n string
Expand All @@ -21,28 +32,135 @@
)
}

# [title] List out HTTP codes that should be treated as retryable
# [name] .should_retry
# [description] Here because {curl} doesn't ship a retry mechanism, so this library
# implements its own.
.should_retry <- function(response) {
retryable_error_codes <- c(
# 408 - timeout
408L
# 422 - unprocessable entity
, 422L
# 425 - too early
, 425L
# 429 - too many requests
, 429L
# 500 - internal server error
, 500L
# 502 - bad gateway
, 502L
# 503 - service unavailable
, 503L
# 504 - gateway timeout
, 504L
)
return(response$status_code %in% retryable_error_codes)
}

# [title] Retry an HTTP requests a couple times (if necessary)
# [name] .retry
# [description] Implements exponential backoff with jitter, around failed requests.
# See .should_retry() for details on which status codes are considered retryable.
# This is here because {curl} does not have a built-in retry API.
#' @importFrom curl curl_fetch_memory
#' @importFrom stats runif
.retry <- function(handle, url) {

max_retries <- 3L
attempt_count <- 1L
while (attempt_count <= max_retries) {

# if this isn't the 1st attempt, apply backoff
if (attempt_count > 1L) {
# exponential backoff with jitter
#
# 1.45s + {jitter}
# 2.10s + {jitter}
# 3.05s + {jitter}
# etc., etc.
#
# ref: https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
sleep_seconds <- 1.45 ** (attempt_count - 1L) + stats::runif(n = 1L, min = 0.1, max = 0.5)
.log_debug(sprintf("Sleeping for %.2f seconds before retrying.", sleep_seconds))
Sys.sleep(sleep_seconds)
}

# execute request
response <- curl::curl_fetch_memory(
url = url
, handle = handle
)

# check if the response should be retried
if (.should_retry(response)) {
.log_debug(sprintf(
"Request failed (status code %i): '%s %s'"
, response$status_code
, response$method
, response$url
))
attempt_count <- attempt_count + 1L
} else {
break
}
}
return(response)
}

# [title] Execute an HTTP request and return the result
# [name] .request
# [description] Mainly here to making mocking easier in testing, but this
# also centralizes the mechanism for HTTP request exexcution in one place.
# also centralizes the mechanism for HTTP request execution in one place.
# [references] https://testthat.r-lib.org/reference/local_mocked_bindings.html#namespaced-calls
#' @importFrom httr add_headers RETRY
.request <- function(verb, url, headers, body) {
result <- httr::RETRY(
verb = verb
#' @importFrom curl handle_setheaders handle_setopt new_handle
.request <- function(verb, url, body) {
handle <- curl::new_handle()

# set headers
#
# This can safely be hard-coded here because every payload this library
# posts and every response body it receives is JSON data.
curl::handle_setheaders(
handle = handle
, "Accept" = "application/json" # nolint[non_portable_path]
, "Content-Type" = "application/json" # nolint[non_portable_path]
)

# set HTTP method
curl::handle_setopt(handle = handle, customrequest = verb)

# add body
if (!is.null(body)) {
curl::handle_setopt(
handle = handle
, copypostfields = body
)
}

# actually execute request
response <- .retry(
handle = handle
, url = url
, config = httr::add_headers(.headers = headers)
, body = body
)
return(result)

return(invisible(response))
}

# [title] Raise an exception if an HTTP response indicates an error
# [name] .stop_for_status
# [description] Mainly here to making mocking easier in testing.
# [references] https://testthat.r-lib.org/reference/local_mocked_bindings.html#namespaced-calls
#' @importFrom httr stop_for_status
# [description] 3xx, 4xx, and 5xx responses are treated as errors.
# curl should automatically follow redirects (which is what most
# 3xx responses are), so if that's working well then this code should
# never actually see a 3xx response.
.stop_for_status <- function(response) {
httr::stop_for_status(response)
return(invisible(NULL))
if (response$status_code <= 300L) {
return(invisible(NULL))
}
.log_fatal(sprintf(
"Request failed (status code %i): '%s %s'"
, response$status_code
, response$method
, response$url
))
}
5 changes: 5 additions & 0 deletions r-pkg/R/logging.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#' @importFrom futile.logger flog.debug
.log_debug <- function(...) {
futile.logger::flog.debug(...)
}

#' @importFrom futile.logger flog.info
.log_info <- function(...) {
futile.logger::flog.info(...)
Expand Down
Loading

0 comments on commit 18df38e

Please sign in to comment.