From 9f2335050ffacb038a20ce6f4e1d7176215ee9b6 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Thu, 31 Aug 2023 17:07:54 +0200 Subject: [PATCH 01/44] initial function to build requests using httr2 --- R/build_request.R | 64 +++++++++++++++++++++++++++++++ R/display_aux.R | 2 +- R/get_aux.R | 2 +- R/get_stats.R | 35 +++++++++-------- R/other.R | 4 +- R/utils.R | 43 +++++++++++++++++---- man/build_request.Rd | 31 +++++++++++++++ man/pip_is_transient.Rd | 18 +++++++++ tests/testthat/test-display_aux.R | 2 +- tests/testthat/test-utils.R | 30 +++++++-------- 10 files changed, 187 insertions(+), 44 deletions(-) create mode 100644 R/build_request.R create mode 100644 man/build_request.Rd create mode 100644 man/pip_is_transient.Rd diff --git a/R/build_request.R b/R/build_request.R new file mode 100644 index 0000000..8576a6b --- /dev/null +++ b/R/build_request.R @@ -0,0 +1,64 @@ +#' build_request +#' +#' @param country +#' @param year +#' @param povline +#' @param popshare +#' @param fill_gaps +#' @param group_by +#' @param welfare_type +#' @param reporting_level +#' @param version +#' @param ppp_version +#' @param release_version +#' @param format +#' @param server +#' +#' @return httr2 request +#' +build_request <- function(country, + year, + povline, + popshare, + fill_gaps, + group_by, + welfare_type, + reporting_level, + version, + ppp_version, + release_version, + format, + server) { + + base_url <- select_base_url(server = server) + + params <- list( + country = country, + year = year, + povline = povline, + popshare = popshare, + fill_gaps = fill_gaps, + group_by = group_by, + welfare_type = welfare_type, + reporting_level = reporting_level, + version = version, + ppp_version = ppp_version, + release_version = release_version, + format = format + ) + + req <- httr2::request(base_url) |> + httr2::req_url_path_append(api_version) |> + httr2::req_url_path_append(endpoint) |> + httr2::req_url_query(!!!params) |> + httr2::req_user_agent(pipr_user_agent) |> + httr2::req_retry( + is_transient = pip_is_transient, + after = retry_after, + max_seconds = 60 + )# |> + #httr2::req_cache(tempdir(), use_on_error = TRUE) + + return(req) + +} diff --git a/R/display_aux.R b/R/display_aux.R index d8e9105..8ac2054 100644 --- a/R/display_aux.R +++ b/R/display_aux.R @@ -32,7 +32,7 @@ display_aux <- function(version = NULL, # ____________________________________________________________________________ # Build query string #### - u <- build_url(server, "aux", api_version = api_version) + u <- build_base_url(server, "aux", api_version = api_version) res <- httr::GET(u) tbs_tb <- parse_response(res, simplify = simplify) tbs <- tbs_tb[["tables"]] diff --git a/R/get_aux.R b/R/get_aux.R index ae70c65..53bb311 100644 --- a/R/get_aux.R +++ b/R/get_aux.R @@ -61,7 +61,7 @@ get_aux <- function(table = NULL, format <- match.arg(format) run_cli <- run_cli() # Build query string - u <- build_url(server, "aux", api_version = api_version) + u <- build_base_url(server, "aux", api_version = api_version) # Return response # If no table is specified, returns list of available tables diff --git a/R/get_stats.R b/R/get_stats.R index f350cf3..73324cf 100644 --- a/R/get_stats.R +++ b/R/get_stats.R @@ -95,23 +95,24 @@ get_stats <- function(country = "all", } # Build query string - args <- build_args( - .country = country, - .year = year, - .povline = povline, - .popshare = popshare, - .fill_gaps = fill_gaps, - .group_by = group_by, - .welfare_type = welfare_type, - .reporting_level = reporting_level, - .version = version, - .ppp_version = ppp_version, - .release_version = release_version, - .format = format + req <- build_request( + country = country, + year = year, + povline = povline, + popshare = popshare, + fill_gaps = fill_gaps, + group_by = group_by, + welfare_type = welfare_type, + reporting_level = reporting_level, + version = version, + ppp_version = ppp_version, + release_version = release_version, + format = format, + server = server ) - u <- build_url(server, endpoint, api_version) - # Send query - res <- httr::GET(u, query = args, httr::user_agent(pipr_user_agent)) + # Perform request + res <- req |> + req_perform() # Parse result out <- parse_response(res, simplify) @@ -146,7 +147,7 @@ get_wb <- function(year = "all", .release_version = release_version, .format = format ) - u <- build_url(server, "pip-grp", api_version) + u <- build_base_url(server, "pip-grp", api_version) # Send query res <- httr::GET(u, query = args, httr::user_agent(pipr_user_agent)) diff --git a/R/other.R b/R/other.R index ba9d3e6..f8d577e 100644 --- a/R/other.R +++ b/R/other.R @@ -24,7 +24,7 @@ check_api <- function(api_version = "v1", server = NULL) { #' } get_versions <- function(api_version = "v1", server = NULL, simplify = TRUE) { check_internet() - u <- build_url(server, "versions", api_version) + u <- build_base_url(server, "versions", api_version) res <- httr::GET(u, httr::user_agent(pipr_user_agent)) parse_response(res, simplify = simplify) } @@ -41,7 +41,7 @@ get_versions <- function(api_version = "v1", server = NULL, simplify = TRUE) { #' } get_pip_info <- function(api_version = "v1", server = NULL) { check_internet() - u <- build_url(server, "pip-info", api_version) + u <- build_base_url(server, "pip-info", api_version) res <- httr::GET(u, httr::user_agent(pipr_user_agent)) parse_response(res, simplify = FALSE)$content } diff --git a/R/utils.R b/R/utils.R index 6df7e68..31d58e3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,7 +9,7 @@ check_internet <- function() { #' @inheritParams check_api #' @noRd health_check <- function(api_version, server = NULL) { - u <- build_url(server, "health-check", api_version) + u <- build_base_url(server, "health-check", api_version) res <- httr::GET(u) attempt::stop_if_not( .x = httr::status_code(res), @@ -46,13 +46,13 @@ check_status <- function(res, parsed) { invisible(TRUE) } -#' build_url +#' build_base_url #' @param server character: Server #' @param endpoint character: Endpoint #' @param api_version character: API version #' @inheritParams get_stats #' @noRd -build_url <- function(server, endpoint, api_version) { +build_base_url <- function(server, endpoint, api_version) { base_url <- select_base_url(server = server) sprintf("%s/%s/%s", base_url, api_version, endpoint) } @@ -113,19 +113,19 @@ build_args <- function(.country = NULL, parse_response <- function(res, simplify) { # Get response type - type <- tryCatch(suppressWarnings(httr::http_type(res)), error = function(e) NULL) + type <- tryCatch(suppressWarnings(httr2::resp_content_type(res)), error = function(e) NULL) # Stop if response type is unknown attempt::stop_if(is.null(type), msg = "Invalid response format") if (type == "application/json") { - parsed <- jsonlite::fromJSON(httr::content(res, "text", encoding = "UTF-8")) + parsed <- jsonlite::fromJSON(httr2::resp_body_string(res, encoding = "UTF-8")) } if (type == "text/csv") { - parsed <- suppressMessages(httr::content(res, encoding = "UTF-8")) + parsed <- suppressMessages(httr2::resp_body_string(res, encoding = "UTF-8")) } if (type == "application/rds") { - parsed <- unserialize(res$content) + parsed <- unserialize(res$body) } if (simplify) { @@ -193,3 +193,32 @@ tmp_rename_cols <- function(df, url = "") { return(df) } + +#' pip_is_transient +#' +#' Helper function to determine if an error is due to the number of requests +#' going over the rate limit +#' +#' @param resp +#' +#' @return logical +#' +pip_is_transient <- function(resp) { + if (httr2::resp_is_error(resp)) { + httr2::resp_status(resp) == 429 & + stringr::str_detect(httr2::resp_body_json(resp, check_type = FALSE)$message, + "Rate limit is exceeded") + } else { + FALSE + } +} + +retry_after <- function(resp) { + if (httr2::resp_is_error(resp)) { + time <- httr2::resp_body_json(resp, check_type = FALSE)$message + time <- stringr::str_remove(time, "Rate limit is exceeded. Try again in ") + readr::parse_number(time) + } else { + 0 + } +} diff --git a/man/build_request.Rd b/man/build_request.Rd new file mode 100644 index 0000000..16b22e7 --- /dev/null +++ b/man/build_request.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_request.R +\name{build_request} +\alias{build_request} +\title{build_request} +\usage{ +build_request( + country, + year, + povline, + popshare, + fill_gaps, + group_by, + welfare_type, + reporting_level, + version, + ppp_version, + release_version, + format, + server +) +} +\arguments{ +\item{server}{} +} +\value{ +httr2 request +} +\description{ +build_request +} diff --git a/man/pip_is_transient.Rd b/man/pip_is_transient.Rd new file mode 100644 index 0000000..83f58bf --- /dev/null +++ b/man/pip_is_transient.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{pip_is_transient} +\alias{pip_is_transient} +\title{pip_is_transient} +\usage{ +pip_is_transient(resp) +} +\arguments{ +\item{resp}{} +} +\value{ +logical +} +\description{ +Helper function to determine if an error is due to the number of requests +going over the rate limit +} diff --git a/tests/testthat/test-display_aux.R b/tests/testthat/test-display_aux.R index c6e6fb8..31fcd30 100644 --- a/tests/testthat/test-display_aux.R +++ b/tests/testthat/test-display_aux.R @@ -4,7 +4,7 @@ test_that("returns proper table", { api_version <- "v1" simplify <- TRUE server <- NULL - u <- build_url(server, "aux", api_version = api_version) + u <- build_base_url(server, "aux", api_version = api_version) res <- httr::GET(u) tbs_tb <- parse_response(res, simplify = simplify) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 8ff7f74..c216f0c 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -44,50 +44,50 @@ test_that("check_status() works", { }) -test_that("build_url() works", { +test_that("build_base_url() works", { # Check that url is correctly pasted together - x <- build_url(server = NULL, endpoint = "pip", api_version = "v1") + x <- build_base_url(server = NULL, endpoint = "pip", api_version = "v1") expect_identical(x, paste0(prod_url, "/v1/pip")) - x <- build_url("prod", "pip", api_version = "v1") + x <- build_base_url("prod", "pip", api_version = "v1") expect_identical(x, paste0(prod_url, "/v1/pip")) - x <- build_url("prod", "pip-grp", api_version = "v2") + x <- build_base_url("prod", "pip-grp", api_version = "v2") expect_identical(x, paste0(prod_url, "/v2/pip-grp")) # Expect error if server arg is incorrect - expect_error(build_url("tmp", "pip", "v1")) + expect_error(build_base_url("tmp", "pip", "v1")) # Check internal URLs skip_if(Sys.getenv("PIPR_RUN_LOCAL_TESTS") != "TRUE") - x <- build_url("qa", "pip", "v1") + x <- build_base_url("qa", "pip", "v1") expect_identical(x, paste0(Sys.getenv("PIP_QA_URL"), "/v1/pip")) - x <- build_url("dev", "pip", "v1") + x <- build_base_url("dev", "pip", "v1") expect_identical(x, paste0(Sys.getenv("PIP_DEV_URL"), "/v1/pip")) # Expect error if ENV vars are not found skip_if(Sys.getenv("PIP_QA_URL") != "") - expect_error(build_url("qa", "pip", "v1")) + expect_error(build_base_url("qa", "pip", "v1")) skip_if(Sys.getenv("PIP_DEV_URL") != "") - expect_error(build_url("dev", "pip", "v1")) + expect_error(build_base_url("dev", "pip", "v1")) }) -test_that("build_url() works for internal URLS", { +test_that("build_base_url() works for internal URLS", { # Check internal URLs skip_if(Sys.getenv("PIPR_RUN_LOCAL_TESTS") != "TRUE") - x <- build_url("qa", "pip", "v1") + x <- build_base_url("qa", "pip", "v1") expect_identical(x, paste0(Sys.getenv("PIP_QA_URL"), "/v1/pip")) - x <- build_url("dev", "pip", "v1") + x <- build_base_url("dev", "pip", "v1") expect_identical(x, paste0(Sys.getenv("PIP_DEV_URL"), "/v1/pip")) }) -test_that("build_url() throws error for internal URLs if ENV vars are not found", { +test_that("build_base_url() throws error for internal URLs if ENV vars are not found", { # Expect error if ENV vars are not found skip_if(Sys.getenv("PIP_QA_URL") != "") - expect_error(build_url("qa", "pip", "v1")) + expect_error(build_base_url("qa", "pip", "v1")) skip_if(Sys.getenv("PIP_DEV_URL") != "") - expect_error(build_url("dev", "pip", "v1")) + expect_error(build_base_url("dev", "pip", "v1")) }) test_that("build_args() works for all individual parameters", { From 356aced2865f66d948addd13ebf3259ef3b91465 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Fri, 1 Sep 2023 11:00:41 +0200 Subject: [PATCH 02/44] replace all references to httr with httr2 --- R/build_request.R | 56 +++++---------------- R/display_aux.R | 7 ++- R/get_aux.R | 26 +++++++--- R/get_stats.R | 33 +++++++------ R/other.R | 12 +++-- R/utils.R | 46 ++++++++++-------- man/build_request.Rd | 18 +------ tests/testthat/test-display_aux.R | 6 ++- tests/testthat/test-get_aux.R | 81 ------------------------------- tests/testthat/test-other.R | 6 +-- 10 files changed, 97 insertions(+), 194 deletions(-) diff --git a/R/build_request.R b/R/build_request.R index 8576a6b..e7dc2f9 100644 --- a/R/build_request.R +++ b/R/build_request.R @@ -1,62 +1,30 @@ #' build_request #' -#' @param country -#' @param year -#' @param povline -#' @param popshare -#' @param fill_gaps -#' @param group_by -#' @param welfare_type -#' @param reporting_level -#' @param version -#' @param ppp_version -#' @param release_version -#' @param format #' @param server +#' @param ... #' #' @return httr2 request #' -build_request <- function(country, - year, - povline, - popshare, - fill_gaps, - group_by, - welfare_type, - reporting_level, - version, - ppp_version, - release_version, - format, - server) { +build_request <- function(server, + api_version, + endpoint, + ...) { base_url <- select_base_url(server = server) - params <- list( - country = country, - year = year, - povline = povline, - popshare = popshare, - fill_gaps = fill_gaps, - group_by = group_by, - welfare_type = welfare_type, - reporting_level = reporting_level, - version = version, - ppp_version = ppp_version, - release_version = release_version, - format = format - ) + params <- list(...) req <- httr2::request(base_url) |> httr2::req_url_path_append(api_version) |> httr2::req_url_path_append(endpoint) |> httr2::req_url_query(!!!params) |> httr2::req_user_agent(pipr_user_agent) |> - httr2::req_retry( - is_transient = pip_is_transient, - after = retry_after, - max_seconds = 60 - )# |> + httr2::req_error(body = parse_error_body) #|> + # httr2::req_retry( + # is_transient = pip_is_transient, + # after = retry_after, + # max_seconds = 60 + # )# |> #httr2::req_cache(tempdir(), use_on_error = TRUE) return(req) diff --git a/R/display_aux.R b/R/display_aux.R index 8ac2054..59e288e 100644 --- a/R/display_aux.R +++ b/R/display_aux.R @@ -31,9 +31,12 @@ display_aux <- function(version = NULL, # ____________________________________________________________________________ # Build query string #### + req <- build_request(server = server, + api_version = api_version, + endpoint = "aux") + res <- req |> + httr2::req_perform() - u <- build_base_url(server, "aux", api_version = api_version) - res <- httr::GET(u) tbs_tb <- parse_response(res, simplify = simplify) tbs <- tbs_tb[["tables"]] if (isTRUE(run_cli)) { diff --git a/R/get_aux.R b/R/get_aux.R index 53bb311..f5ce3f3 100644 --- a/R/get_aux.R +++ b/R/get_aux.R @@ -61,12 +61,15 @@ get_aux <- function(table = NULL, format <- match.arg(format) run_cli <- run_cli() # Build query string - u <- build_base_url(server, "aux", api_version = api_version) + req <- build_request(server = server, + api_version = api_version, + endpoint = "aux") # Return response # If no table is specified, returns list of available tables if (is.null(table)) { - res <- httr::GET(u) + res <- req |> + httr2::req_perform() tables <- parse_response(res, simplify = simplify) cli::cli_text("Auxiliary tables available are") cli::cli_ul(tables$tables) @@ -80,12 +83,19 @@ get_aux <- function(table = NULL, return(invisible(tables)) # If a table is specified, returns that table } else { - args <- build_args(.table = table, - .version = version, - .ppp_version = ppp_version, - .release_version = release_version, - .format = format) - res <- httr::GET(u, query = args, httr::user_agent(pipr_user_agent)) + req <- build_request(server = server, + api_version = api_version, + endpoint = "aux", + table = table, + version = version, + release_version = release_version, + format = format) + # args <- build_args(.table = table, + # .version = version, + # .ppp_version = ppp_version, + # .release_version = release_version, + # .format = format) + res <- httr2::req_perform(req) rt <- parse_response(res, simplify = simplify) } diff --git a/R/get_stats.R b/R/get_stats.R index 73324cf..3b07b5a 100644 --- a/R/get_stats.R +++ b/R/get_stats.R @@ -108,11 +108,13 @@ get_stats <- function(country = "all", ppp_version = ppp_version, release_version = release_version, format = format, - server = server + server = server, + api_version = api_version, + endpoint = endpoint ) # Perform request res <- req |> - req_perform() + httr2::req_perform() # Parse result out <- parse_response(res, simplify) @@ -137,20 +139,21 @@ get_wb <- function(year = "all", format <- match.arg(format) # Build query string - args <- build_args( - .country = "all", - .year = year, - .povline = povline, - .group_by = "wb", - .version = version, - .ppp_version = ppp_version, - .release_version = release_version, - .format = format + req <- build_request( + year = year, + povline = povline, + group_by = "wb", + version = version, + ppp_version = ppp_version, + release_version = release_version, + format = format, + server = server, + api_version = api_version, + endpoint = "pip-grp" ) - u <- build_base_url(server, "pip-grp", api_version) - - # Send query - res <- httr::GET(u, query = args, httr::user_agent(pipr_user_agent)) + # Perform request + res <- req |> + httr2::req_perform() # Parse result out <- parse_response(res, simplify) diff --git a/R/other.R b/R/other.R index f8d577e..a4952db 100644 --- a/R/other.R +++ b/R/other.R @@ -24,8 +24,10 @@ check_api <- function(api_version = "v1", server = NULL) { #' } get_versions <- function(api_version = "v1", server = NULL, simplify = TRUE) { check_internet() - u <- build_base_url(server, "versions", api_version) - res <- httr::GET(u, httr::user_agent(pipr_user_agent)) + req <- build_request(server = server, + api_version = api_version, + endpoint = "versions") + res <- httr2::req_perform(req) parse_response(res, simplify = simplify) } @@ -41,7 +43,9 @@ get_versions <- function(api_version = "v1", server = NULL, simplify = TRUE) { #' } get_pip_info <- function(api_version = "v1", server = NULL) { check_internet() - u <- build_base_url(server, "pip-info", api_version) - res <- httr::GET(u, httr::user_agent(pipr_user_agent)) + req <- build_request(server = server, + api_version = api_version, + endpoint = "pip-info") + res <- httr2::req_perform(req) parse_response(res, simplify = FALSE)$content } diff --git a/R/utils.R b/R/utils.R index 31d58e3..8248034 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,10 +9,12 @@ check_internet <- function() { #' @inheritParams check_api #' @noRd health_check <- function(api_version, server = NULL) { - u <- build_base_url(server, "health-check", api_version) - res <- httr::GET(u) + req <- build_request(server = server, + api_version = api_version, + endpoint = "health-check") + res <- httr2::req_perform(req) attempt::stop_if_not( - .x = httr::status_code(res), + .x = httr2::resp_status(res), .p = ~ .x == 200, msg = "Could not connect to the API" ) @@ -23,22 +25,15 @@ health_check <- function(api_version, server = NULL) { #' @param res A httr response #' @param parsed A parsed response #' @noRd -check_status <- function(res, parsed) { - if (res$status_code != 200) { - if ("error" %in% names(parsed)) { +check_status <- function(res) { + if (httr2::resp_is_error(res)) { msg1 <- paste( - httr::http_status(res$status_code)$message, - parsed$error, + httr2::resp_status_desc(res), "Use simplify = FALSE to see the full error response.", sep = "\n*\t") - } else { - msg1 <- paste( - httr::http_status(res$status_code)$message, - "Use simplify = FALSE to see the full error response.", - sep = "\n*\t") - } + attempt::stop_if_not( - .x = httr::status_code(res), + .x = httr2::resp_status(res), .p = ~ .x == 200, msg = msg1 ) @@ -129,7 +124,7 @@ parse_response <- function(res, simplify) { } if (simplify) { - check_status(res, parsed) + httr2::resp_check_status(res, info = parsed$message) parsed <- tibble::as_tibble(parsed) # TEMP fix for renaming of columns # To be removed when pipapi#207 @@ -141,7 +136,7 @@ parse_response <- function(res, simplify) { structure( list( url = res$url, - status = res$status_code, + status = res$statusCode, type = type, content = parsed, response = res @@ -205,9 +200,12 @@ tmp_rename_cols <- function(df, url = "") { #' pip_is_transient <- function(resp) { if (httr2::resp_is_error(resp)) { - httr2::resp_status(resp) == 429 & + if (httr2::resp_status(resp) == 429) { stringr::str_detect(httr2::resp_body_json(resp, check_type = FALSE)$message, - "Rate limit is exceeded") + "Rate limit is exceeded") + } else { + FALSE + } } else { FALSE } @@ -222,3 +220,13 @@ retry_after <- function(resp) { 0 } } + +parse_error_body <- function(resp) { + if (httr2::resp_is_error(resp)) { + out <- resp_body_json(resp) + message1 <- out$error[[1]] + message2 <- out$details[[1]]$msg[[1]] + message3 <- paste(unlist(out$details[[names(out$details)]]$valid), collapse = ", ") + message <- c(message1, message2, message3) + } +} diff --git a/man/build_request.Rd b/man/build_request.Rd index 16b22e7..232ad8c 100644 --- a/man/build_request.Rd +++ b/man/build_request.Rd @@ -4,24 +4,10 @@ \alias{build_request} \title{build_request} \usage{ -build_request( - country, - year, - povline, - popshare, - fill_gaps, - group_by, - welfare_type, - reporting_level, - version, - ppp_version, - release_version, - format, - server -) +build_request(server, ...) } \arguments{ -\item{server}{} +\item{...}{} } \value{ httr2 request diff --git a/tests/testthat/test-display_aux.R b/tests/testthat/test-display_aux.R index 31fcd30..c136947 100644 --- a/tests/testthat/test-display_aux.R +++ b/tests/testthat/test-display_aux.R @@ -4,8 +4,10 @@ test_that("returns proper table", { api_version <- "v1" simplify <- TRUE server <- NULL - u <- build_base_url(server, "aux", api_version = api_version) - res <- httr::GET(u) + req <- build_request(server = server, + api_version = api_version, + endpoint = "aux") + res <- httr2::req_perform(req) tbs_tb <- parse_response(res, simplify = simplify) tt <- suppressMessages(display_aux(server = server, simplify = simplify, api_version = api_version)) diff --git a/tests/testthat/test-get_aux.R b/tests/testthat/test-get_aux.R index eee97b0..9939e76 100644 --- a/tests/testthat/test-get_aux.R +++ b/tests/testthat/test-get_aux.R @@ -82,84 +82,3 @@ test_that("get_regions() works", { expect_true(tibble::is_tibble(res)) expect_identical(res, res2) }) - - -# test_that("get_countries() with mocking works", { -# mockery::stub(get_aux, "httr::GET", function(...) { -# readRDS(test_path("testdata", "response-country.RDS")) -# }) -# -# mockery::stub(get_countries, "get_aux", function(...) { -# readRDS(test_path("testdata", "response-country.RDS")) -# }) -# -# res1 <- get_aux("countries") -# res2 <- parse_response(get_countries(), TRUE) -# -# expect_true(tibble::is_tibble(res1)) -# expect_true(tibble::is_tibble(res2)) -# -# expect_equal(dim(res1), dim(res2)) -# expect_identical(res1, res2) -# }) -# -# -# test_that("get_regions() with mocking works", { -# mockery::stub(get_aux, "httr::GET", function(...) { -# readRDS(test_path("testdata", "response-regions.RDS")) -# }) -# -# mockery::stub(get_regions, "get_aux", function(...) { -# readRDS(test_path("testdata", "response-regions.RDS")) -# }) -# -# res1 <- get_aux("regions") -# res2 <- parse_response(get_regions(), TRUE) -# -# expect_true(tibble::is_tibble(res1)) -# expect_true(tibble::is_tibble(res2)) -# -# expect_equal(dim(res1), dim(res2)) -# expect_identical(res1, res2) -# }) -# -# -# test_that("get_cpi() with mocking works", { -# mockery::stub(get_aux, "httr::GET", function(...) { -# readRDS(test_path("testdata", "response-cpi.RDS")) -# }) -# -# mockery::stub(get_cpi, "get_aux", function(...) { -# readRDS(test_path("testdata", "response-cpi.RDS")) -# }) -# -# res1 <- get_aux("cpi") -# res2 <- parse_response(get_cpi(), TRUE) -# -# expect_true(tibble::is_tibble(res1)) -# expect_true(tibble::is_tibble(res2)) -# -# expect_equal(dim(res1), dim(res2)) -# expect_identical(res1, res2) -# }) -# -# test_that("get_dictionary() with mocking works", { -# #Waiting for this PR to be merged https://github.com/worldbank/pipr/pull/43 -# #so that get_dictionary() works -# mockery::stub(get_aux, "httr::GET", function(...) { -# readRDS(test_path("testdata", "response-dictionary.RDS")) -# }) -# -# mockery::stub(get_dictionary, "get_aux", function(...) { -# readRDS(test_path("testdata", "response-dictionary.RDS")) -# }) -# -# res1 <- get_aux("dictionary") -# res2 <- parse_response(get_dictionary(), TRUE) -# -# expect_true(tibble::is_tibble(res1)) -# expect_true(tibble::is_tibble(res2)) -# -# expect_equal(dim(res1), dim(res2)) -# expect_identical(res1, res2) -# }) diff --git a/tests/testthat/test-other.R b/tests/testthat/test-other.R index 98894eb..a55bc90 100644 --- a/tests/testthat/test-other.R +++ b/tests/testthat/test-other.R @@ -5,15 +5,15 @@ test_that("health_check() works", { skip_if_offline() skip_on_cran() res <- health_check(api_version = "v1") - expect_identical(httr::content(res)[[1]], "PIP API is running") + expect_identical(httr2::resp_body_json(res)[[1]], "PIP API is running") expect_equal(res$status_code, 200) expect_invisible(health_check(api_version = "v1")) expect_error(health_check("xx")) skip_if(Sys.getenv("PIPR_RUN_LOCAL_TESTS") != "TRUE") skip_if(is.null(curl::nslookup(dev_host, error = FALSE)), message = "Could not connect to DEV host") - expect_identical(httr::content(health_check(api_version = "v1", server = "dev"))[[1]], "PIP API is running") + expect_identical(httr2::resp_body_json(health_check(api_version = "v1", server = "dev"))[[1]], "PIP API is running") skip_if(is.null(curl::nslookup(qa_host, error = FALSE)), message = "Could not connect to QA host") - expect_identical(httr::content(health_check(api_version = "v1", server = "qa"))[[1]], "PIP API is running") + expect_identical(httr2::resp_body_json(health_check(api_version = "v1", server = "qa"))[[1]], "PIP API is running") }) test_that("get_versions() works", { From 88c8e2d4b62f8b75f5b731bb5385276fb5c06b01 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Fri, 1 Sep 2023 17:41:43 +0200 Subject: [PATCH 03/44] adjust unit test suite and fix a few issues --- R/build_request.R | 9 ++++ R/get_aux.R | 6 +-- R/utils.R | 60 ++++++++++++++++++--- tests/testthat/test-caching.R | 66 ++++++++++++------------ tests/testthat/test-get_aux.R | 22 ++------ tests/testthat/test-get_stats.R | 13 ----- tests/testthat/test-utils.R | 19 +++---- tests/testthat/testdata/res-ex-csv.RDS | Bin 2273 -> 3928 bytes tests/testthat/testdata/res-ex-json.RDS | Bin 1865 -> 4309 bytes tests/testthat/testdata/res-ex-rds.RDS | Bin 2335 -> 5026 bytes 10 files changed, 105 insertions(+), 90 deletions(-) diff --git a/R/build_request.R b/R/build_request.R index e7dc2f9..9a5c577 100644 --- a/R/build_request.R +++ b/R/build_request.R @@ -13,6 +13,7 @@ build_request <- function(server, base_url <- select_base_url(server = server) params <- list(...) + params <- lapply(params, fix_params) req <- httr2::request(base_url) |> httr2::req_url_path_append(api_version) |> @@ -30,3 +31,11 @@ build_request <- function(server, return(req) } + +fix_params <- function(param) { + if (length(param) > 1) { + return(paste(param, collapse = ",")) + } else { + return(param) + } +} diff --git a/R/get_aux.R b/R/get_aux.R index f5ce3f3..85633b4 100644 --- a/R/get_aux.R +++ b/R/get_aux.R @@ -90,11 +90,7 @@ get_aux <- function(table = NULL, version = version, release_version = release_version, format = format) - # args <- build_args(.table = table, - # .version = version, - # .ppp_version = ppp_version, - # .release_version = release_version, - # .format = format) + res <- httr2::req_perform(req) rt <- parse_response(res, simplify = simplify) } diff --git a/R/utils.R b/R/utils.R index 8248034..3e873a1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -8,7 +8,7 @@ check_internet <- function() { #' health_check #' @inheritParams check_api #' @noRd -health_check <- function(api_version, server = NULL) { +health_check <- function(api_version = "v1", server = NULL) { req <- build_request(server = server, api_version = api_version, endpoint = "health-check") @@ -117,7 +117,9 @@ parse_response <- function(res, simplify) { parsed <- jsonlite::fromJSON(httr2::resp_body_string(res, encoding = "UTF-8")) } if (type == "text/csv") { - parsed <- suppressMessages(httr2::resp_body_string(res, encoding = "UTF-8")) + parsed <- suppressMessages(vroom::vroom( + I(httr2::resp_body_string(res, encoding = "UTF-8"))) + ) } if (type == "application/rds") { parsed <- unserialize(res$body) @@ -136,7 +138,7 @@ parse_response <- function(res, simplify) { structure( list( url = res$url, - status = res$statusCode, + status = res$status_code, type = type, content = parsed, response = res @@ -211,6 +213,15 @@ pip_is_transient <- function(resp) { } } +#' retry_after +#' +#' Helper function to determine how much time to wait before a new +#' query can be sent +#' +#' @param resp +#' +#' @return numeric +#' retry_after <- function(resp) { if (httr2::resp_is_error(resp)) { time <- httr2::resp_body_json(resp, check_type = FALSE)$message @@ -221,12 +232,45 @@ retry_after <- function(resp) { } } +#' parse_error_body +#' +#' Helper function to parse error messages generated by the PIP API +#' +#' @param resp +#' +#' @return character +#' parse_error_body <- function(resp) { if (httr2::resp_is_error(resp)) { - out <- resp_body_json(resp) - message1 <- out$error[[1]] - message2 <- out$details[[1]]$msg[[1]] - message3 <- paste(unlist(out$details[[names(out$details)]]$valid), collapse = ", ") - message <- c(message1, message2, message3) + if (is_gateway_timeout(resp)) { + # Handle gateway timeout + return(httr2::resp_status_desc(resp)) + } else if (is_bad_gateway(resp)) { + # Handle bad gateway timeout + return(httr2::resp_status_desc(resp)) + } else { + # Handle regular PIP errors + out <- httr2::resp_body_json(resp) + message1 <- out$error[[1]] + message2 <- out$details[[1]]$msg[[1]] + message3 <- paste(unlist(out$details[[names(out$details)]]$valid), collapse = ", ") + message <- c(message1, message2, message3) + return(message) + } } } + +is_gateway_timeout <- function(resp) { + httr2::resp_status(resp) == 504 & + httr2::resp_status_desc(resp) == "Gateway Timeout" +} + +is_bad_gateway <- function(resp) { + httr2::resp_status(resp) == 502 & + httr2::resp_status_desc(resp) == "Bad Gateway" +} + + +# handle_gateway_timeout <- function(resp) { +# +# } diff --git a/tests/testthat/test-caching.R b/tests/testthat/test-caching.R index ca55a93..0275c0b 100644 --- a/tests/testthat/test-caching.R +++ b/tests/testthat/test-caching.R @@ -1,33 +1,33 @@ -library(callr) - -test_that("Caching is enabled by default", { - skip_on_cran() - # Setup external R session - r <- callr::r_session$new(options = callr::r_session_options(user_profile = FALSE)) - r$run(function() Sys.setenv("PIPR_DISABLE_CACHING" = "FALSE")) - r$run(function() library(pipr)) - # Check that main functions are cached - tmp <- r$run(function() memoise::is.memoised(get_stats)) - expect_true(tmp) - tmp <- r$run(function() memoise::is.memoised(get_wb)) - expect_true(tmp) - tmp <- r$run(function() memoise::is.memoised(get_aux)) - expect_true(tmp) - r$kill() -}) - -test_that("Caching can be disabled", { - skip_on_cran() - # Setup external R session - r <- callr::r_session$new(options = callr::r_session_options(user_profile = FALSE)) - r$run(function() Sys.setenv("PIPR_DISABLE_CACHING" = "TRUE")) - r$run(function() library(pipr)) - # Check that main functions are NOT cached - tmp <- r$run(function() memoise::is.memoised(get_stats)) - expect_false(tmp) - tmp <- r$run(function() memoise::is.memoised(get_wb)) - expect_false(tmp) - tmp <- r$run(function() memoise::is.memoised(get_aux)) - expect_false(tmp) - r$kill() -}) +# library(callr) +# +# test_that("Caching is enabled by default", { +# skip_on_cran() +# # Setup external R session +# r <- callr::r_session$new(options = callr::r_session_options(user_profile = FALSE)) +# r$run(function() Sys.setenv("PIPR_DISABLE_CACHING" = "FALSE")) +# r$run(function() library(pipr)) +# # Check that main functions are cached +# tmp <- r$run(function() memoise::is.memoised(get_stats)) +# expect_true(tmp) +# tmp <- r$run(function() memoise::is.memoised(get_wb)) +# expect_true(tmp) +# tmp <- r$run(function() memoise::is.memoised(get_aux)) +# expect_true(tmp) +# r$kill() +# }) +# +# test_that("Caching can be disabled", { +# skip_on_cran() +# # Setup external R session +# r <- callr::r_session$new(options = callr::r_session_options(user_profile = FALSE)) +# r$run(function() Sys.setenv("PIPR_DISABLE_CACHING" = "TRUE")) +# r$run(function() library(pipr)) +# # Check that main functions are NOT cached +# tmp <- r$run(function() memoise::is.memoised(get_stats)) +# expect_false(tmp) +# tmp <- r$run(function() memoise::is.memoised(get_wb)) +# expect_false(tmp) +# tmp <- r$run(function() memoise::is.memoised(get_aux)) +# expect_false(tmp) +# r$kill() +# }) diff --git a/tests/testthat/test-get_aux.R b/tests/testthat/test-get_aux.R index 9939e76..ad9f14a 100644 --- a/tests/testthat/test-get_aux.R +++ b/tests/testthat/test-get_aux.R @@ -38,30 +38,14 @@ test_that("get_aux() works when calling specific tables", { expect_true(tibble::is_tibble(res)) # Check failure if table doesn't exist - # TO DO: Use prod server for this test when API has been released - # expect_error(get_aux("tmp")) - # expect_true(is.list(get_aux("tmp", simplify = FALSE))) skip_if(Sys.getenv("PIPR_RUN_LOCAL_TESTS") != "TRUE") expect_error(get_aux("wrong-table-name", server = "qa")) - expect_true(is.list(get_aux("wrong-table-name", simplify = FALSE, server = "prod"))) # Check all tables - skip("survey_metadata gives a 500 error. Need to add functionality for list data") - dl <- lapply(res, function(x) try(get_aux(x))) + # skip("survey_metadata gives a 500 error. Need to add functionality for list data") + dl <- lapply(res$tables, function(x) try(get_aux(x))) expect_true(all(sapply(dl, tibble::is_tibble))) - expect_true(sapply(dl, function(x) any(class(x) != "try-error"))) - # expect_false(sapply(dl, function(x) any(names(x) == "error"))) -}) - -test_that("User agent works", { - skip_if_offline() - skip_on_cran() - # res <- get_aux(simplify = FALSE) - # tmp <- res$response$request$options$useragent - # expect_identical(tmp, pipr_user_agent) - res <- get_aux("gdp", simplify = FALSE) - tmp <- res$response$request$options$useragent - expect_identical(tmp, pipr_user_agent) + expect_true(all(sapply(dl, function(x) any(class(x) != "try-error")))) }) ## Test helper functions ---- diff --git a/tests/testthat/test-get_stats.R b/tests/testthat/test-get_stats.R index 2ee287f..3c8da10 100644 --- a/tests/testthat/test-get_stats.R +++ b/tests/testthat/test-get_stats.R @@ -185,16 +185,3 @@ test_that("get_wb() works w/ simplify = FALSE", { expect_true(is.data.frame(res$content)) expect_gte(nrow(res$content), 3) }) - - -test_that("User agent works", { - skip_if_offline() - skip_on_cran() - res <- get_stats("AGO", 2000, simplify = FALSE) - tmp <- res$response$request$options$useragent - expect_identical(tmp, pipr_user_agent) - - res <- get_wb(2000, simplify = FALSE) - tmp <- res$response$request$options$useragent - expect_identical(tmp, pipr_user_agent) -}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index c216f0c..a735fe4 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -17,7 +17,7 @@ test_that("check_internet() works", { test_that("check_api() works", { skip_if_offline() skip_on_cran() - res <- check_api("v1", server = NULL) + res <- check_api("v1", server = "qa") expect_equal(res, "PIP API is running") }) @@ -26,21 +26,16 @@ test_that("check_status() works", { skip_on_cran() # 200 res <- health_check("v1") - parsed <- parse_response(res, simplify = FALSE)$content - expect_true(check_status(res, parsed)) + expect_true(check_status(res)) # 404 res <- res_ex_404 - parsed <- parse_response(res, simplify = FALSE)$content - expect_error(check_status(res, parsed)) + expect_error(check_status(res)) # 500 res <- res_ex_404 - parsed <- parse_response(res, simplify = FALSE)$content res$status_code <- 500 - parsed$error <- NULL - parsed$details <- NULL - expect_error(check_status(res, parsed)) + expect_error(check_status(res)) }) @@ -204,7 +199,7 @@ test_that("parse_response() works for different formats", { res <- parse_response(res_ex_json, simplify = FALSE) expect_identical(names(res), c("url", "status", "type", "content", "response")) expect_identical(class(res), "pip_api") - expect_identical(class(res$response), "response") + expect_identical(class(res$response), "httr2_response") expect_identical(class(res$content), "data.frame") # csv @@ -213,7 +208,7 @@ test_that("parse_response() works for different formats", { res <- parse_response(res_ex_csv, simplify = FALSE) expect_identical(names(res), c("url", "status", "type", "content", "response")) expect_identical(class(res), "pip_api") - expect_identical(class(res$response), "response") + expect_identical(class(res$response), "httr2_response") expect_true(all(class(res$content) %in% c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))) # rds @@ -222,7 +217,7 @@ test_that("parse_response() works for different formats", { res <- parse_response(res_ex_rds, simplify = FALSE) expect_identical(names(res), c("url", "status", "type", "content", "response")) expect_identical(class(res), "pip_api") - expect_identical(class(res$response), "response") + expect_identical(class(res$response), "httr2_response") expect_true(all(class(res$content) %in% c("data.table", "data.frame"))) }) diff --git a/tests/testthat/testdata/res-ex-csv.RDS b/tests/testthat/testdata/res-ex-csv.RDS index f40b1e2ea29c014a8e6838ffe5ac53d4c0e1b89a..a4450387fdc5fca2e0315131335474ef7a14e0d5 100644 GIT binary patch literal 3928 zcmbVPU2ogS6_pcblX{ybMS*VnP?SJ{1SlwZzCUajg*S0h1I;FlvOs`72qSVN6QM{h zNjcH>xljF1eeTccALzDcC^B}m*+p6d(rE6?y>sum=iV9r_V0H_-QQ<>r}zHxuh$2d z+!*cO*c;uriJv!rfv;Qm+{b5cv_JYu5I#M7wIkYGv~ATK9Ui(Wo4hUSJe|AZ&7`cC zhgDV`UeYl9At~2ITW?NIpFV%E@vc5`dH!IL<@s#ss^;X$>7S-&58nEG;c7o?HAKHH7ti`Ktu4`KK?N-kX8`1a)MS5L-f$L8(iVqLuP>5ll|jrY~q<=Lg*S$PFy zW*thCq%7ZL{?YT7(-UTt7d*A;d@hY;ntP%b#Hu6}f-Vd@j$XL-;^go+n!1&rX01Q@ zsw_OFK3UhhXV~2{KH2_`v%f3KanqKykE7MPX~(s{L`Po2K=vqsYz6b$b-OZNdj%57$NG7Ji%+c~~5izfpeH1@p|2SII;K@fe!pC-#e{5yPMVKgy zPw{9crw7y1XJ^k}otkQ8q=kSnP;D2VrWz@OAC$oh9n3>O4J3vUGe&k5Wen zla!C=shN)j3S(>+oQyTs785Q6Vq)gFp`h^2*4r$8DVvtOIJATxa@efMwJ_=QWU1)h_-f#QtMeUX= z*U#<`UhEur`|Mqn)d*nQZ+|^)>kKUE{0qHs+*NXHu1{40$*tshCGdzS&?CH?&|DY7m^aEK0!mG{mA-}_oL`X*^jCpbw8SZ zwEaj)oK!fWs;)s)mDgv>^!ltye4I723{_DFzgnNhY1V*v^L6mAyN+WdKEu5`%n#6k z$5V%E(iPF^0wi_ubb1<}7E73jam~k}VaG+M;_~>9Urw(kVQIoBU@n^Vs-tF1whC-Q zWgHd-F-B;uB*#E0WW+*fg(XxfrL0y#COPMfOG}7gf^!WSm6ULbOVt<%jnc86a4oo! zf+-^?!O3Exl#t9)iYX%^*q4+^t%M@bgnn>CQY#g1XMv5(bSDZWxFAFdTuKTnB6vZd z!JtWnFmSMQ1IJhyE-q*rt68^^bGRv`&`c3@u;4DnZXd!BRvTr)!2}^Ptp#GCM9D6E zO@z@>Tca&Zvu+zgG{Q&%6qM1ODn(zkIKu)Xir@nvu&S(tvlNI}2ALqi4@7Awp*W?p zMg_H2Sz%TzKJ!)Ig=tU;Y|2G(M`0)PzM zLAd7sB@Q&-H<$CrFV9Y|rVM5$2>sqU2lQ@ng zjIHcOvWU>Accx~yaZh*C-Ln@fFT5eaJNy8C0OFl@9s+Mc;V1AX9KNdVo}C%5k>Ld- zVrg7cr*7wb=R0T4*=JjwPG_~V)>-eYuH$QGllB1QdRlR z_V(F}zwUO`+SE zqn$3ywB%!xL2zrKO{Hue8Hy5my(jPViM_ z(@S|T&ysl9F5f!u)zD{o6=!MLh~GW$wdY*;EUn&Z?O(j>t@$I3?44F-(_uD3>^1W5 zdWCVz<-(HX!z3zO%U4?L5i>R|<0?LFA6QG`vcmWhpWAi(uH5!XXd#{#!2ZrBhlfwM zX`g-@=ikQpe>2Xnee-`5+AFQGZx+o9_^dYT8)X$$(^lEdQ(Kf!zMY=fXaq@p-beOk zr!G1=uOC<6uE#4?+T)jsd1ZMs>yXhGr4_Qc3%fY!Wm616a`_edohTHMV4Mn+UYQZ$B--Axj#Bzr(JHG z%{{X{2BQ?^9e%aV0fIsZa*2FHDGIiiU<5 z&IOJMGz|*36K!-z74;$`q&AEj+^Ch)i%TAILIlT8uY}m?KyZNJAmv~ff|hJ+IX#r z=33xdAft?QAW__dCya!4QhJflQcHt-IE8Z5UB`*jg9@Q3p;RzgYo`TlfW8dRC>++L zZ$b(OXwH}hl<NO8;45(nPZBiBoxk4E+o~`y%6GwkO~5?0y|YBiV=tcJq1icVHINp#TJb1vdmwG3GEL=8L+9!9(Wejg z=cz+=pQ;7!u=+^N9biM?aBZB@NDa<*#uRb+g&+gKBR-+f5a>$4CM6-P4ysTpmn00+ zE>ls#K^}s*hCek>)S=M+8A2Io=i<0M>5>`%2XmqZl?(gcCQR-vq)@07H@9 zKu_;J=m8HQ%i(ssK|OOAhcgXy2I&yVqKRa92GAm(P>FOXYdUd;$VTP@XAP+{jex71 znQ&OBlqaZXgJc4c%>;5+ooTQM^mn-d`uHdl?wk_Hs7eas6OBPS1-}s~NXA0>5Rk|_ zxJn}NG9ZMAG9jYT1rLzmcf#n-?4<3Ikg_xIhe9{>KYzg#NZHrgJsK#@?`nMPZ#1WK~(`81!E z@^yFARP-9)uoR7=A3DyyPosYYEi=)rQ2`M>pdvXyz&NR;Pj6nva-|GY$4 zcYSwsK1qV^{F)cs+rGDicYDvq{%~mXwzPiPbnz=oyY3|M5h~?mTlFO>iEh#JGu{vT ztP79|boIQ?nxgz#{aJXgCEWFC=}XbHnspDr_}I>A;Jx!Y!oGEes?rW7c9Iosm-Kcm z8&A`rFWz76V`go^#8xNSsP#B(Sngq(edE|BqglVR*sEWM=JM~{o)-Nqy{#<#@4F~z)BuYn#y~4z;SB<(+cWu6LxZ2Je;MibM vgz(Cq$98V&yUi;a<#p7;m-|{H8$?vGA`yB z5ekfCDJy6)8%N}fL~>EeVq6Tq&(j1vxg(rwwSKD{=FToV{KaENJDP#C5y>>uFf9!^ zedP5WN85YSJjy1{vWNz_plPyI8Cc$IJvb?oQxZ08Y@8C3={TZiq%m_0cQO|D(+$$} zl#<Oa5xXcTkkux|`hzk(FRDgq%rtO<=k+02O(X6rkJ4K=s zJG5-~_m@ zO`>$t*C69aKFpou znX5{k z!-J#!z5S<+ne`;i6FQwX#J3?~7?~2r2y09sfNwt>1T@soP(Q?mVLHgtCt+`*+W>~{ zPc1|DEYF95W!t9Nf?@@msM>TL0z>j@*UhS6tv!PLt^WUD7Y<3_oX1dKD%nTJ3@14_ zqz{uI4WZOj6C0q+!vv*3Nk*j5cLm08mE76olzk zl)(XHrTu(VFbY{B{Nq(n7xdF&UMYO4_EB>EFooi!rtdz}AB(g&$oLu5wx2u`&Z%3_ z&(GdHs$$dF>xleN5-1761`c?SQOmB5K`EGD{<_tK^k3^cb;$>K(k_ zwEMh@@NJb;bQG|jh;ol3Sdy3QjLgS4U}-XsVeXrUqlGv~&j6OfLeT5M8Kg#!3lOqu4POYHo~JaB!6i177`XLJoW(#k zIF)5^bI9ftNm)Y#Ou%3GEH9@kr=sVy{W{=GA(}Z5+lptC|`z=iCiB!yaFteNnm6dHpJbB<%J?cu$2=%%MQszB4g5X@p5xNpbklwICgCm6eCAiLUt!FT$0-s@L~*_418Lagv7B=vv9tllXRC;G)dO w{KZzyuB)e_eE+UZ(r_-fT~*tvx8;ZOZ3&O%3?5eeOX-_me*NR#Yxu7IH?l^%Jpcdz literal 1865 zcmV-P2e$YhiwFP!000002JM(nZzH!Az~%q7H=CqbAW07m214O27Sd>j^G~CNVJL~S zYarV-VjmV*6aqa%NmElZQx3|K*-oR;SZk~|HX3Ui@NZ-D{UZ}O2!!{bC3@e4&kAU4!}phC z&eL9}bMb;+hG~=XZdsr$Rbh|<9br`g_z{f~|hb}tAWVnzmhni8qZNSZJn#iIcw=Y&exFkutS_x_M2 zG1zlg+SX|NwXk=)X78_0#(7IKJ?#l*8Zr>ljBd|HJ;&7^Jvpf8Btx82-p*K{J!`rz)zA* zGfpNw4WA}C?`apA_`sk0d7R-PX-BaL%l&zqoMXCZh0_>MqTv2REelwbs#VI1eD6dL zwIGRO65L=Q_&gciL~Y3Mge*}68~Kzj>$y5b65qt4+BWSR_n#a*{rK?F;qPiaTXB-b z(QsH3--(kpBSXR%VYRL|ahlR7z&uLgPLWgP{`Jv|D6Ov&jPv-6KnAR@mc$#dMII(TUrNJ=EqNC<{Kyju#UsXW|SV){jJUUkk ztkWpt@cjxt_h#U(-jl~q1>{PwfARR__YIo-1Rw&I!xFc-0&e9D-yvULw8Kn7uO^*MG0QKKov3;?~jA1`xWvP6Mfp#pN#}tNSIXfrQ0S;IaPbbj#_`TzW zI7lD`;M83@X0e8j%vQ7f1O$D^-YlQU|~X|AvuhAl>mT8`?A!dX9{JEkmp| z6Cf!-NVP55w!{)Mfh`5LquP#aJFcq)t`vC2T*H%sZr4GzG+t}LWWnR$gr4D~PXQWE-BRd!Bn0BUn^g@BYuQWZs^o4?aHV-;yJ|MvAqU;SnO;KDslw2*_X4LD?md42??_5r3ODp$D!Gq3&TENvjoBbdlsj6yT6~COU*0gqMbP7cZ z?RdHi4^oY_Wy{tr9cd6H)-iQkM@2ofGW%54QyI6NXY!zo`Mhw0Adg7N0=YfcW$bGg za88lIgiI2q3e?wUvSA(vvTnLsLe49)3E|@;R9g-{)XT?0Zx|68&P&Z|nY@E@J|6Jo zjHnWAGs$_H^J~hkEk^#B6II%-z15igO0}`@xc*r>%^eZ)!5GIOB}-h~x$4xNS-~tX zzvX`3fg~GLMZi~b7Lig@7Wzwmqu7AcXQ8K@EFz-pTBqkTOGC;>Di{C& D$%dM; diff --git a/tests/testthat/testdata/res-ex-rds.RDS b/tests/testthat/testdata/res-ex-rds.RDS index 6bbeed40ac631e0ea700ad2d352c4fee6819c209..3ed461e7fac5594036729d7b7ac0531bf18e8c32 100644 GIT binary patch literal 5026 zcmeHLeQ*@z8Q1A-P`waw-YRm z|8=~X{dk_=yU+W)&-=X3$9-dcK4boJv({JWA1*EgBAan$XEE6hcy(03EeEc0xUv{0 zQ(!PV+InrQM4zrJT60YeR)p$)Srz#ZmX=q`YNSRHl$v<0@j#p$lXNxV^>=hlNf4}h zu_#Un3!)f|U`6w``@b7#o6=9jFjh%WPbkD3pb|w^bwP>*MG_}s@*`P|V%@9q8Yog? z>c%jqEPV}K)AgO)t_m|)cP$$r3hQxu>e$+u&7S&ZkDKk7XJ1JfRuoa-urA0_O;Uck ztURtSCM_qtZHmI>geWd73UOjXdch(=(>m3hEH4*GYgbRe>uH=t>g)I>J`|eS*yL%b zBkqQSH7#raM@c}?iFdv%5g-W{Q|)s!+2^=r`8H*%B-y3u zvPx#K(U_*YR1$|Z@mL22Kpf`!>~rVegztMl*?*y$__E5OZcNR&g(EchGI5}Hn; z%`6VdF}<1X*Nh8pQ)7~b!^9;>q9Bo}DcUf@D8(lv92K~!Ev&|=fA3GRq&HB27-=yLL3XRsR$xu=~gtd?P+1aKd-H;r?aDTz75Hf zWK9ynVH>*$97ZK!qN+r-LA7x9I(aWA@UD1mGmqVF&&=97SBP&6x$5CyxSGOsZdXHH zLlY2n_4OVP1EC6p(8!bn8$*nckPHFIaOIo>{%eJ<;#OEBfpzvmbg_8jdQ~iGppdIg z;$$8IjRh3KU}Kvg5>QTn=&n}t7~hmr(rTO%7zLqD7Oi2q-71bmu|<_rEo>9!wXIMD z6&8?Gd{RJH1@ID6KO+XZ%+oLE32VUZR1W34x@Cx66fRrrYB$}YOEK0!#ZFjkDo3s0 z%0$b0ti7?x>LK5c5eNz9lq+<*;W-g&)bMmSN+9g1o?surJW&ns23!*vrubPi%{Ulm zRyN}>vI~>5u(#dSXvOboz%Vut2AKI~JREpe0QX_oC)OVZn@~f3F(gx}BM|Ug6WCuf z2j=>vh%C}6$EGsBa6p2iqHV7XQ);Ztx77y^hVgBE(0Z;jpKwcGdA?-kAxz!q=5*$F zj=A>+ECmWW?``XL&FgHXjGWFnJ#GFkF%ezBTZLX_hi_|$F5nlH1MkhX$t~i*?_yEY z|15?67t6AcDHU$$qvV^9GBQ`{G%|xF*?4apeXj+7vaFp|Xa0P)C&kg3h_CxU+h?NV zhh7^wReS}VyOOst`0=AD{6Kii@e>OlMmtVjo-!%tXXwa=yFYy{ZbTl_7--LY?D)aS zm1xt|s!P$$kDwvPnKc)B_NVY)FJyl6#6zpmp4goao__Nsbdq(V?e5np?wh>wft_pb zne4lxwDRhL38#Ix{q5kDhYZn{{^Nf>u;A4N>wM#1|6u>_X3hV{~crk=TXZ?~UTmbY*YC)6j)QYc?JqK8@b(n{~(acZO2<#REql zX!zzY(fMNb_hoB#qD%KYa{uM%b18gnRbl-0*+Xb^!%vF7cFl>_H$SuTY-N24e{A65 zVP$Y1+E(+znQy(AK#yOXIAPhHZ>R8WlXE`4Sol2JIdNlSS5+0-R<5pI^_@j2e3x^= zAKvg)qCL}o8}D0s4ek8)vhMple^23qH@aRLZ1kX^qsQaBOQxg2p9;J7PaIC+`-&1v zmRvuAUT)vJAk;LBUVLYM+q`3j=Op)M|As>k>u>a-gRjiE>$WQ!(EgWO?*F>$-eeUL(870 zxNphXj#Ug(V1H8FFVA`Z7!ZDa?a`aY&>|I>|As99!xsQ04w=n)i)XAbSw01ET|JGtJvyl z(ubvZY2!*q`xvA5u>_Wla?S3>VwWcE7{p+FDG{ZEEiNa=3Dgr*gCdKaCbl@ei?Y71 zOnn_yAeDv$akP>s=j0?)BtcCfB275OiZPX*2paXUC8xkh=;op=MiCtZU7W=#rz{Ox zHg$BKoPMn9v=DJd38s}*Zj|ssigH9n-jd{u2$EpEgEk^og#j}uTD&}-a8S|LW|E#v zvM!UX&m?DNlCv_&hD?$s7;D}p3(dFGpt;DwDO78RqENGxjPAA~kHL)aV=xttTE0xs zf~F4}cF>^-O`euXHHp4m5BKp7Nl0`$o#w(aQAQ}dP6f93RI}(zA}KbTilwubCR-)w{S+vejsI9~ymh_ezV#Ih^I!a;pNRkf literal 2335 zcmV+)3E=i0iwFP!000002JIMYa1+N_wk!+F2Ae|7BcVCoF0~{}7;NM^D+LU=E!c@& zpwLvTq?L5>>F#&usMhr};5* z7={#T34U@gM*)7XPD&CV@Os-inf4e@^A$c%8z(Ta2xMD4oRIKBp7yr-yqE-{oXko> zM{Q_M!=w%bg<8lklX$L`p;=Vx^URnOrx_-kfV@~cJM^8Vx=C$_i9-Q}r4Amc%mU&$ zL8942m_e2L7!K!FB+i^|Y0*km~nv-%l<*TCRL4opD`2AExb)eE8 zm{vv2nb&M#E>FRBH%y?~=~FAJ{1p`yH5AQ8899cU81!-2Sp?Co?9E_RA<}8V! zIFuR5EecU;3Pr`B1Orrr;UWPFtBJ%8Q9`Kz1tXj+1*kSrdGOz4nH6CixoK9h%48iz zt&qt;u?`ld=;-7cN{k9LPgXH5GT=ltRFq>`6upaqSm#vYZrI`iOrb1{SVpOX$wv9? z6dZdu5?S2jY6{J(Yj~u7PW^lf#mRCaOUL6DbRo;R1r$eufCLMv81g(rN1;S>tT!D} zMsi-Fljg09M3XYxf^Y!jfq>>)(P7m)E-Eu?*q5+#Ag9*_jcib`0!MHH0LJcnf?uNe*6qe=WXsVb@MUhvB}tj<~BDs zdVL<>z5KqH-~WsJ&bj?1O6gpp?A>Lt$v$?Co-ayJl8La+RwRg6cmgI7jA2ZAyqeKY zTe`@w-T7@Fl743u(b?G0RKF-Hk>yp8Y%`XLOaeQ}h?tuBM7=lziJ9_vC?$2Z6b}+3 zxKKM!3u;smUS<0y8zN3g8CiD80)cDukTYU%7KQZiS5n2J#)YQENbKD$i>!5;CK5aI| z|MuX9plY<>hNas3Cz^Ln6AfZjw?O7|VT%BBA8I15s(+}i(LJwzCP6srXFXD9)$xlg zL=-shs^wn}Zio;CelxuD-MN-|3vtqla*8(u*XN^kT}`Hv7C$u;yglKk3;&w>flgmJ-qkwd!QEhO)vCh# zZe9ng0~?<>HSANJ{zB*F9=>}g*yP>u$FJ`i2i9F4S^DVxuj%wnV;mn}c5MJ#N3N-E z7=IFMDiNMs{;erG{gu4Zb0kJsa#ie6;n~ zMSlR@zo1{)H|k}b{#s$j;>CUIz@FK!J{+0xGqCH*{JMEZB07EFv%h^_I?03md&|du z_4+}uZ%@s_ucb2b^$fmsXmHmVaJcR9Lw~OA0zK<@j*JLh8UHt2+_G}hHqiUVg3h_i zUjn`HzucG-yrI)C{O)w!uxsyuGdn(6^!EpU4laDZeWwOKhx91vzuU=Yn z=&9k2!S2%O>xZr?CG;oM9CB_BzSua^vv$>);Ks#$SI%x$%0ibh*Rtf716OhaHvnKg zFlC_?bqomIMZujuHY6BfkO`w5BmqB-F+ww$+OC8k859-`jbX(O5+WuV8l#okJt8Y+F?zfhi4vWH8T0_R{Ch!vFX+DvL0=+b z*Fc8uMB#Mv)n>l2*wYv5e(k1~6ME~}o(Vyl?F-)6%!wv-axAhglsdJ@@?7= zjP4n|ZS2P{g8NQ9GtT|60LoVE=sj_w2>ib9!{mEFf})CVs%zUO}=T9TF$0^}q45YgVYv+NApbYOv=TMXNA2 zQyvpJww#hsyX0+8G3AtzrXr?zr&#Pw=E)C5qlhPW^nR@km}}vh#Ly9ZG9jr1zAq|F|9`eu0R&mZBXBcoQjPT$RL60ojV0=Q5)Ww zP&kEBoIp0HA!%G(W~15_%-Rsq`wJ-~B{`CVcC3Rgh1Lww_~JBTVtV7m-1KolSxSZ_ zt_A5EAty_`EZw1r;1tjgWkeb^cBNcXG<0M%wmw;>DO5}zPC_=upe!o7KC4t=y8UB3 z&@6fu;us5?Q8>N078)5qwQWUqc0 F005DqdT{^% From e5a0a228f34a6d021b56b496654062da601c5909 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Fri, 1 Sep 2023 17:47:24 +0200 Subject: [PATCH 04/44] add documentation --- man/build_request.Rd | 2 +- man/parse_error_body.Rd | 17 +++++++++++++++++ man/retry_after.Rd | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 man/parse_error_body.Rd create mode 100644 man/retry_after.Rd diff --git a/man/build_request.Rd b/man/build_request.Rd index 232ad8c..d3a805e 100644 --- a/man/build_request.Rd +++ b/man/build_request.Rd @@ -4,7 +4,7 @@ \alias{build_request} \title{build_request} \usage{ -build_request(server, ...) +build_request(server, api_version, endpoint, ...) } \arguments{ \item{...}{} diff --git a/man/parse_error_body.Rd b/man/parse_error_body.Rd new file mode 100644 index 0000000..f8be1fd --- /dev/null +++ b/man/parse_error_body.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{parse_error_body} +\alias{parse_error_body} +\title{parse_error_body} +\usage{ +parse_error_body(resp) +} +\arguments{ +\item{resp}{} +} +\value{ +character +} +\description{ +Helper function to parse error messages generated by the PIP API +} diff --git a/man/retry_after.Rd b/man/retry_after.Rd new file mode 100644 index 0000000..49bd284 --- /dev/null +++ b/man/retry_after.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{retry_after} +\alias{retry_after} +\title{retry_after} +\usage{ +retry_after(resp) +} +\arguments{ +\item{resp}{} +} +\value{ +numeric +} +\description{ +Helper function to determine how much time to wait before a new +query can be sent +} From 80c5bec1358089d40ab508ec841045122914f90e Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Fri, 1 Sep 2023 17:49:56 +0200 Subject: [PATCH 05/44] add httr2 and R minimum version to support native pipe --- DESCRIPTION | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a577330..2fd7132 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,7 +41,6 @@ Suggests: rmarkdown, markdown, callr, - mockery, ggplot2, tidyr, ggthemes, @@ -53,17 +52,15 @@ Language: en-US Imports: attempt, curl, - httr, jsonlite, tibble, purrr, - memoise, - cachem, data.table, cli, rlang, - utils + utils, + httr2 Depends: - R (>= 3.6.0) + R (>= 4.1.0) Config/testthat/edition: 3 Date: 2023-04-28 From 23fc955bc9dd40366a9056ff72182b82ed405272 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Fri, 1 Sep 2023 18:35:21 +0200 Subject: [PATCH 06/44] fix documentation --- DESCRIPTION | 4 +++- R/build_request.R | 4 +++- man/build_request.Rd | 6 ++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2fd7132..ebb4155 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -59,7 +59,9 @@ Imports: cli, rlang, utils, - httr2 + httr2, + stringr, + vroom Depends: R (>= 4.1.0) Config/testthat/edition: 3 diff --git a/R/build_request.R b/R/build_request.R index 9a5c577..2442d3a 100644 --- a/R/build_request.R +++ b/R/build_request.R @@ -1,6 +1,8 @@ #' build_request #' -#' @param server +#' @param server character: Server. For WB internal use only +#' @param api_version character: API version +#' @param endpoint character: PIP API endpoint #' @param ... #' #' @return httr2 request diff --git a/man/build_request.Rd b/man/build_request.Rd index d3a805e..0dfd41b 100644 --- a/man/build_request.Rd +++ b/man/build_request.Rd @@ -7,6 +7,12 @@ build_request(server, api_version, endpoint, ...) } \arguments{ +\item{server}{character: Server. For WB internal use only} + +\item{api_version}{character: API version} + +\item{endpoint}{character: PIP API endpoint} + \item{...}{} } \value{ From 8eff3da17207973f4b39e8bdc7051ff5b82a1fee Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Fri, 1 Sep 2023 18:35:54 +0200 Subject: [PATCH 07/44] disable ad-hoc caching (to be replaced with httr2 cache mechanism) --- R/zzz.R | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index f6a0d22..1387f4a 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,23 +1,23 @@ -.onLoad <- function(libname, pkgname) { - if (!Sys.getenv("PIPR_DISABLE_CACHING") == "TRUE") { - # d <- rappdirs::user_cache_dir("pipr") - # cm <- cachem::cache_disk(d, - # evict = "lru", - # max_size = 512 * 1024^2) - cm <- cachem::cache_mem(max_size = 512 * 1024^2, evict = "lru") - get_stats <<- memoise::memoise(get_stats, cache = cm) - get_wb <<- memoise::memoise(get_wb, cache = cm) - get_aux <<- memoise::memoise(get_aux, cache = cm) - get_versions <<- memoise::memoise(get_versions, cache = cm) - } - - options(cli.ignore_unknown_rstudio_theme = TRUE) - -} - -.onAttach <- function(libname, pkgname) { - if (!Sys.getenv("PIPR_DISABLE_CACHING") == "TRUE") { - packageStartupMessage("Info: Session based caching is enabled.") - } -} +# .onLoad <- function(libname, pkgname) { +# if (!Sys.getenv("PIPR_DISABLE_CACHING") == "TRUE") { +# # d <- rappdirs::user_cache_dir("pipr") +# # cm <- cachem::cache_disk(d, +# # evict = "lru", +# # max_size = 512 * 1024^2) +# cm <- cachem::cache_mem(max_size = 512 * 1024^2, evict = "lru") +# get_stats <<- memoise::memoise(get_stats, cache = cm) +# get_wb <<- memoise::memoise(get_wb, cache = cm) +# get_aux <<- memoise::memoise(get_aux, cache = cm) +# get_versions <<- memoise::memoise(get_versions, cache = cm) +# } +# +# options(cli.ignore_unknown_rstudio_theme = TRUE) +# +# } +# +# .onAttach <- function(libname, pkgname) { +# if (!Sys.getenv("PIPR_DISABLE_CACHING") == "TRUE") { +# packageStartupMessage("Info: Session based caching is enabled.") +# } +# } From 104f8873cfa9255548d9dbcb42addeee45f9a12f Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Fri, 1 Sep 2023 18:37:29 +0200 Subject: [PATCH 08/44] Add automatic retry to handle throttling --- R/build_request.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/build_request.R b/R/build_request.R index 2442d3a..2249ebe 100644 --- a/R/build_request.R +++ b/R/build_request.R @@ -22,12 +22,12 @@ build_request <- function(server, httr2::req_url_path_append(endpoint) |> httr2::req_url_query(!!!params) |> httr2::req_user_agent(pipr_user_agent) |> - httr2::req_error(body = parse_error_body) #|> - # httr2::req_retry( - # is_transient = pip_is_transient, - # after = retry_after, - # max_seconds = 60 - # )# |> + httr2::req_error(body = parse_error_body) |> + httr2::req_retry( + is_transient = pip_is_transient, + after = retry_after, + max_seconds = 60 + )# |> #httr2::req_cache(tempdir(), use_on_error = TRUE) return(req) From b22f8c7b403b0e741690feb1238c0218c7f8fa86 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Fri, 1 Sep 2023 18:38:09 +0200 Subject: [PATCH 09/44] add response caching --- R/build_request.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/build_request.R b/R/build_request.R index 2249ebe..2857fe1 100644 --- a/R/build_request.R +++ b/R/build_request.R @@ -27,8 +27,8 @@ build_request <- function(server, is_transient = pip_is_transient, after = retry_after, max_seconds = 60 - )# |> - #httr2::req_cache(tempdir(), use_on_error = TRUE) + ) |> + httr2::req_cache(tempdir(), use_on_error = TRUE) return(req) From 8d14b16527f86aa25ffd3e59acd28a77dd5ae958 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Fri, 1 Sep 2023 18:58:17 +0200 Subject: [PATCH 10/44] disable httr2 cache. Not working as expected --- R/build_request.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/build_request.R b/R/build_request.R index 2857fe1..ff74e21 100644 --- a/R/build_request.R +++ b/R/build_request.R @@ -27,8 +27,10 @@ build_request <- function(server, is_transient = pip_is_transient, after = retry_after, max_seconds = 60 - ) |> - httr2::req_cache(tempdir(), use_on_error = TRUE) + ) #|> + # httr2::req_cache(rappdirs::user_cache_dir("pipr"), + # use_on_error = TRUE, + # debug = TRUE) return(req) From 6c897f5376c41b5079b934d2d9c7f78006bb6d74 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Mon, 4 Sep 2023 15:08:11 +0200 Subject: [PATCH 11/44] add back caching with new mechanism to handle cache path --- R/build_request.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/build_request.R b/R/build_request.R index ff74e21..475e268 100644 --- a/R/build_request.R +++ b/R/build_request.R @@ -21,16 +21,16 @@ build_request <- function(server, httr2::req_url_path_append(api_version) |> httr2::req_url_path_append(endpoint) |> httr2::req_url_query(!!!params) |> + httr2::req_cache(tools::R_user_dir("pipr", which = "cache"), + use_on_error = TRUE, + debug = TRUE) |> httr2::req_user_agent(pipr_user_agent) |> httr2::req_error(body = parse_error_body) |> httr2::req_retry( is_transient = pip_is_transient, after = retry_after, max_seconds = 60 - ) #|> - # httr2::req_cache(rappdirs::user_cache_dir("pipr"), - # use_on_error = TRUE, - # debug = TRUE) + ) return(req) From c767e489fbb336a3b6496999a6382a092c0ffd89 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Tue, 5 Sep 2023 14:55:14 +0200 Subject: [PATCH 12/44] Increment version number to 1.1.0 --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ebb4155..74630c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pipr Title: Client for the Poverty and Inequality Platform ('PIP') API -Version: 1.0.0 +Version: 1.1.0 Authors@R: c(person(given = "Tony", family = "Fujs", diff --git a/NEWS.md b/NEWS.md index a3798ea..ce4311b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# pipr 1.1.0 + # pipr 1.0.0 * [Mock live API calls or skip them on CRAN](https://github.com/worldbank/pipr/pull/45) From 3130a4d8719cecb4e406904dfc0130100e4ce706 Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Tue, 5 Sep 2023 15:44:36 +0200 Subject: [PATCH 13/44] add helper functions to deal with cache --- NAMESPACE | 2 ++ NEWS.md | 8 ++++++++ R/utils.R | 46 ++++++++++++++++++++++++++++++++++++++++--- man/delete_cache.Rd | 17 ++++++++++++++++ man/get_cache_info.Rd | 17 ++++++++++++++++ 5 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 man/delete_cache.Rd create mode 100644 man/get_cache_info.Rd diff --git a/NAMESPACE b/NAMESPACE index 9a730e2..36d4135 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,8 +2,10 @@ export(call_aux) export(check_api) +export(delete_cache) export(display_aux) export(get_aux) +export(get_cache_info) export(get_pip_info) export(get_stats) export(get_versions) diff --git a/NEWS.md b/NEWS.md index ce4311b..901afe3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ # pipr 1.1.0 +* [Use httr2](https://github.com/worldbank/pipr/pull/70) + * API responses are now cached locally according to the PIP API cache policy + from the PIP API responses headers + * `pipr` automatically handles retries when hitting the PIP API rate limiting + threshold + * Improved translation of HTTP errors into R error messages +* New helper functions `delete_cache()` and `get_cache_info()` + # pipr 1.0.0 * [Mock live API calls or skip them on CRAN](https://github.com/worldbank/pipr/pull/45) diff --git a/R/utils.R b/R/utils.R index 3e873a1..92b6909 100644 --- a/R/utils.R +++ b/R/utils.R @@ -270,7 +270,47 @@ is_bad_gateway <- function(resp) { httr2::resp_status_desc(resp) == "Bad Gateway" } +#' Deletes content of the cache folder +#' +#' +#' @return Side effect. Deletes files. +#' +#' @export +#' +#' @examples \dontrun{delete_cache()} +delete_cache <- function() { + + cached_files <- list.files(tools::R_user_dir("pipr", which = "cache"), + full.names = TRUE) -# handle_gateway_timeout <- function(resp) { -# -# } + if (length(cached_files) == 0) { + message("Cache is empty. Nothing to delete") + } else { + lapply(cached_files, file.remove) + message("All items have been deleted from the cache.") + } +} + +#' Provides some information about cached items +#' +#' +#' @return character. +#' +#' @export +#' +#' @examples +#' \dontrun{get_cache_info()} +get_cache_info <- function() { + + cache_path <- tools::R_user_dir("pipr", which = "cache") + n_cached <- length(list.files(cache_path)) + + if (n_cached > 1) { + message_text <- " API responses are currently cached in " + } else { + message_text <- " API response is currently cached in " + } + + message(cli::format_message(c("Cache status:", + "i" = paste0(n_cached, message_text, cache_path)))) +} diff --git a/man/delete_cache.Rd b/man/delete_cache.Rd new file mode 100644 index 0000000..7f004bd --- /dev/null +++ b/man/delete_cache.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{delete_cache} +\alias{delete_cache} +\title{Deletes content of the cache folder} +\usage{ +delete_cache() +} +\value{ +Side effect. Deletes files. +} +\description{ +Deletes content of the cache folder +} +\examples{ +\dontrun{delete_cache()} +} diff --git a/man/get_cache_info.Rd b/man/get_cache_info.Rd new file mode 100644 index 0000000..e09c361 --- /dev/null +++ b/man/get_cache_info.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_cache_info} +\alias{get_cache_info} +\title{Provides some information about cached items} +\usage{ +get_cache_info() +} +\value{ +character. +} +\description{ +Provides some information about cached items +} +\examples{ +\dontrun{get_cache_info()} +} From 31e364cfb59222715d79f2d85edb1b10d487a67f Mon Sep 17 00:00:00 2001 From: tonyfujs Date: Thu, 14 Sep 2023 15:58:45 +0200 Subject: [PATCH 14/44] make arrow default format --- R/get_stats.R | 2 +- R/utils.R | 4 ++++ man/get_stats.Rd | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/get_stats.R b/R/get_stats.R index 3b07b5a..34319fe 100644 --- a/R/get_stats.R +++ b/R/get_stats.R @@ -69,7 +69,7 @@ get_stats <- function(country = "all", ppp_version = NULL, release_version = NULL, api_version = "v1", - format = c("rds", "json", "csv"), + format = c("arrow", "rds", "json", "csv"), simplify = TRUE, server = NULL) { # Match args diff --git a/R/utils.R b/R/utils.R index 92b6909..c7b0541 100644 --- a/R/utils.R +++ b/R/utils.R @@ -113,6 +113,10 @@ parse_response <- function(res, simplify) { # Stop if response type is unknown attempt::stop_if(is.null(type), msg = "Invalid response format") + if (type == "application/vnd.apache.arrow.file") { + parsed <- arrow::read_feather(res$body) + } + if (type == "application/json") { parsed <- jsonlite::fromJSON(httr2::resp_body_string(res, encoding = "UTF-8")) } diff --git a/man/get_stats.Rd b/man/get_stats.Rd index 921f9f9..9752fbc 100644 --- a/man/get_stats.Rd +++ b/man/get_stats.Rd @@ -18,7 +18,7 @@ get_stats( ppp_version = NULL, release_version = NULL, api_version = "v1", - format = c("rds", "json", "csv"), + format = c("arrow", "rds", "json", "csv"), simplify = TRUE, server = NULL ) From fa1af89fd864f541523260aeae4d57c2685b2874 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Thu, 26 Sep 2024 13:07:43 -0400 Subject: [PATCH 15/44] check() passed --- DESCRIPTION | 5 +++-- R/build_request.R | 2 +- R/utils.R | 6 +++--- man/build_request.Rd | 2 +- man/parse_error_body.Rd | 2 +- man/pip_is_transient.Rd | 2 +- man/retry_after.Rd | 2 +- tests/testthat/test-get_aux.R | 1 + 8 files changed, 12 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 74630c7..fc15dab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,7 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 URL: https://worldbank.github.io/pipr/, https://github.com/worldbank/pipr, @@ -49,7 +49,8 @@ Suggests: dplyr, readr Language: en-US -Imports: +Imports: + arrow, attempt, curl, jsonlite, diff --git a/R/build_request.R b/R/build_request.R index 475e268..b148732 100644 --- a/R/build_request.R +++ b/R/build_request.R @@ -3,7 +3,7 @@ #' @param server character: Server. For WB internal use only #' @param api_version character: API version #' @param endpoint character: PIP API endpoint -#' @param ... +#' @param ... other parameters #' #' @return httr2 request #' diff --git a/R/utils.R b/R/utils.R index c7b0541..02e39f7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -200,7 +200,7 @@ tmp_rename_cols <- function(df, url = "") { #' Helper function to determine if an error is due to the number of requests #' going over the rate limit #' -#' @param resp +#' @param resp A httr response #' #' @return logical #' @@ -222,7 +222,7 @@ pip_is_transient <- function(resp) { #' Helper function to determine how much time to wait before a new #' query can be sent #' -#' @param resp +#' @param resp A httr response #' #' @return numeric #' @@ -240,7 +240,7 @@ retry_after <- function(resp) { #' #' Helper function to parse error messages generated by the PIP API #' -#' @param resp +#' @param resp A httr response #' #' @return character #' diff --git a/man/build_request.Rd b/man/build_request.Rd index 0dfd41b..96c72d2 100644 --- a/man/build_request.Rd +++ b/man/build_request.Rd @@ -13,7 +13,7 @@ build_request(server, api_version, endpoint, ...) \item{endpoint}{character: PIP API endpoint} -\item{...}{} +\item{...}{other parameters} } \value{ httr2 request diff --git a/man/parse_error_body.Rd b/man/parse_error_body.Rd index f8be1fd..bf50e2d 100644 --- a/man/parse_error_body.Rd +++ b/man/parse_error_body.Rd @@ -7,7 +7,7 @@ parse_error_body(resp) } \arguments{ -\item{resp}{} +\item{resp}{A httr response} } \value{ character diff --git a/man/pip_is_transient.Rd b/man/pip_is_transient.Rd index 83f58bf..738bcd1 100644 --- a/man/pip_is_transient.Rd +++ b/man/pip_is_transient.Rd @@ -7,7 +7,7 @@ pip_is_transient(resp) } \arguments{ -\item{resp}{} +\item{resp}{A httr response} } \value{ logical diff --git a/man/retry_after.Rd b/man/retry_after.Rd index 49bd284..805973f 100644 --- a/man/retry_after.Rd +++ b/man/retry_after.Rd @@ -7,7 +7,7 @@ retry_after(resp) } \arguments{ -\item{resp}{} +\item{resp}{A httr response} } \value{ numeric diff --git a/tests/testthat/test-get_aux.R b/tests/testthat/test-get_aux.R index ad9f14a..7b54e58 100644 --- a/tests/testthat/test-get_aux.R +++ b/tests/testthat/test-get_aux.R @@ -43,6 +43,7 @@ test_that("get_aux() works when calling specific tables", { # Check all tables # skip("survey_metadata gives a 500 error. Need to add functionality for list data") + res <- get_aux() dl <- lapply(res$tables, function(x) try(get_aux(x))) expect_true(all(sapply(dl, tibble::is_tibble))) expect_true(all(sapply(dl, function(x) any(class(x) != "try-error")))) From ddee18b82f0043e48f9ab702a2ec6d88c0fe287d Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Fri, 27 Sep 2024 12:35:43 -0400 Subject: [PATCH 16/44] # PR 63 added, check() passed --- DESCRIPTION | 1 - R/aaa.R | 17 +++++----- R/get_aux.R | 7 +++-- R/utils.R | 77 +++++++++++++++++++++++++++++++++++++++++----- man/call_aux.Rd | 4 +-- man/get_aux.Rd | 5 +-- man/rename_cols.Rd | 22 +++++++++++++ man/set_aux.Rd | 4 +-- 8 files changed, 113 insertions(+), 24 deletions(-) create mode 100644 man/rename_cols.Rd diff --git a/DESCRIPTION b/DESCRIPTION index fc15dab..cebea23 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,7 +56,6 @@ Imports: jsonlite, tibble, purrr, - data.table, cli, rlang, utils, diff --git a/R/aaa.R b/R/aaa.R index 84d15a3..8c85037 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -1,23 +1,24 @@ -.pip <- new.env(parent = emptyenv()) +.pip <- new.env(parent = emptyenv()) # PR 63 +.pipcache <- new.env(parent = emptyenv()) # PR 63 #' Set auxiliary table in .pip environment for later call #' #' @param table character: name of the table in .pip env #' @param value data to be saved -#' @inheritParams get_aux +#' @param replace logical. #' #' @return Invisible TRUE if set correctly. FALSE otherwise #' @keywords internal set_aux <- function(table, value, - force = FALSE) { + replace = FALSE) { # PR 63 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## Evaluate if exists -------- to_set <- 1 if (rlang::env_has(.pip, table)) { - if (force == FALSE) { + if (isFALSE(replace)) { # PR 63 cli::cli_alert("Table {.field {table}} already exists.") to_set <- utils::menu(c("Replace with new table", "Abort")) } @@ -65,12 +66,12 @@ set_aux <- function(table, #' @examples #' # call one table #' -#' get_aux("gdp", assign_tb = TRUE, force = TRUE) +#' get_aux("gdp", assign_tb = TRUE, replace = TRUE) # PR 63 #' call_aux("gdp") #' #' # see the name of several tables in memory #' tb <- c("cpi", "ppp", "pop") -#' lapply(tb, get_aux, assign_tb = TRUE, force = TRUE) +#' lapply(tb, get_aux, assign_tb = TRUE, replace = TRUE) # PR 63 #' call_aux() call_aux <- function(table = NULL) { @@ -119,7 +120,9 @@ call_aux <- function(table = NULL) { return(rlang::env_get(.pip, table)) } else { msg <- c("*" = "Table {.field {table}} does not exist") - cli::cli_abort(msg, wrap = TRUE) + cli::cli_abort(msg, + + wrap = TRUE) } } diff --git a/R/get_aux.R b/R/get_aux.R index 85633b4..e792254 100644 --- a/R/get_aux.R +++ b/R/get_aux.R @@ -13,7 +13,8 @@ #' assigned to exactly the same name as the one of the desired table. If #' character, the table will be assigned to that name. #' @inheritParams get_stats -#' @param force logical: force replacement. Default is FALSE +#' @param replace logical: force replacement of aux files in `.pip` env. Default +#' is FALSE. #' #' @return If `simplify = FALSE`, it returns a list of class "pip_api". If #' `simplify = TRUE`, it returns a tibble with the requested data. This is the @@ -54,7 +55,7 @@ get_aux <- function(table = NULL, simplify = TRUE, server = NULL, assign_tb = FALSE, - force = FALSE) { + replace = FALSE) { # Match args api_version <- match.arg(api_version) @@ -113,7 +114,7 @@ get_aux <- function(table = NULL, srt <- set_aux(table = tb_name, value = rt, - force = force) + replace = replace) if (isTRUE(srt)) { diff --git a/R/utils.R b/R/utils.R index 02e39f7..03ade8b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,8 @@ +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Parsing and checking functions ------------- +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# PR 63 + #' check_internet #' @noRd check_internet <- function() { @@ -179,22 +184,80 @@ select_base_url <- function(server) { return(base_url) } +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Formatting functions ------------- +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +# PR 63 +#' rename columns in dataframe +#' +#' @param df data frame +#' @param oldnames character: old names +#' @param newnames character: new names +#' +#' @return data frame with new names +#' @keywords internal +rename_cols <- function(df, oldnames, newnames) { + + # _______________________________________ + # Defenses #### + stopifnot( exprs = { + is.data.frame(df) + length(oldnames) == length(newnames) + # all(oldnames %in% names(df)) + } + ) + + # ___________________________________________ + # Computations #### + df_names <- names(df) + + old_position <- which(oldnames %in% df_names) + old_available <- oldnames[old_position] + new_available <- newnames[old_position] + + tochange <- vector(length = length(old_available)) + + for (i in seq_along(old_available)) { + tochange[i] <- which(df_names %in% old_available[i]) + } + + names(df)[tochange] <- new_available + + + # ____________________________________________ + # Return #### + return(df) + +} + #' Rename columns #' TEMP function to rename response cols #' @param df A data.frame #' @param url response url #' @noRd tmp_rename_cols <- function(df, url = "") { - df <- data.table::setnames( - df, - old = c("survey_year", "reporting_year", "reporting_pop", "reporting_gdp", "reporting_pce", "pce_data_level"), - new = c("welfare_time", "year", "pop", "gdp", "hfce", "hfce_data_level"), - skip_absent = TRUE - ) + # PR 63 + oldnames = c( + "survey_year", + "reporting_year", + "reporting_pop", + "reporting_gdp", + "reporting_pce", + "pce_data_level" + ) - return(df) + newnames = c("welfare_time", + "year", + "pop", + "gdp", + "hfce", + "hfce_data_level") + + rename_cols(df,oldnames, newnames) } + #' pip_is_transient #' #' Helper function to determine if an error is due to the number of requests diff --git a/man/call_aux.Rd b/man/call_aux.Rd index 6487b0e..37bfbd4 100644 --- a/man/call_aux.Rd +++ b/man/call_aux.Rd @@ -19,11 +19,11 @@ call a table from .pip env \examples{ # call one table -get_aux("gdp", assign_tb = TRUE, force = TRUE) +get_aux("gdp", assign_tb = TRUE, replace = TRUE) # PR 63 call_aux("gdp") # see the name of several tables in memory tb <- c("cpi", "ppp", "pop") -lapply(tb, get_aux, assign_tb = TRUE, force = TRUE) +lapply(tb, get_aux, assign_tb = TRUE, replace = TRUE) # PR 63 call_aux() } diff --git a/man/get_aux.Rd b/man/get_aux.Rd index 60209ed..76d78f1 100644 --- a/man/get_aux.Rd +++ b/man/get_aux.Rd @@ -27,7 +27,7 @@ get_aux( simplify = TRUE, server = NULL, assign_tb = FALSE, - force = FALSE + replace = FALSE ) get_countries( @@ -170,7 +170,8 @@ If \code{FALSE} no assignment will performed. If \code{TRUE}, the table will be assigned to exactly the same name as the one of the desired table. If character, the table will be assigned to that name.} -\item{force}{logical: force replacement. Default is FALSE} +\item{replace}{logical: force replacement of aux files in \code{.pip} env. Default +is FALSE.} } \value{ If \code{simplify = FALSE}, it returns a list of class "pip_api". If diff --git a/man/rename_cols.Rd b/man/rename_cols.Rd new file mode 100644 index 0000000..9443ad3 --- /dev/null +++ b/man/rename_cols.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{rename_cols} +\alias{rename_cols} +\title{rename columns in dataframe} +\usage{ +rename_cols(df, oldnames, newnames) +} +\arguments{ +\item{df}{data frame} + +\item{oldnames}{character: old names} + +\item{newnames}{character: new names} +} +\value{ +data frame with new names +} +\description{ +rename columns in dataframe +} +\keyword{internal} diff --git a/man/set_aux.Rd b/man/set_aux.Rd index 2d097fe..9d09400 100644 --- a/man/set_aux.Rd +++ b/man/set_aux.Rd @@ -4,14 +4,14 @@ \alias{set_aux} \title{Set auxiliary table in .pip environment for later call} \usage{ -set_aux(table, value, force = FALSE) +set_aux(table, value, replace = FALSE) } \arguments{ \item{table}{character: name of the table in .pip env} \item{value}{data to be saved} -\item{force}{logical: force replacement. Default is FALSE} +\item{replace}{logical.} } \value{ Invisible TRUE if set correctly. FALSE otherwise From 7638f4c387a42781c92a48caede673c9866df8fd Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Wed, 2 Oct 2024 12:04:52 -0400 Subject: [PATCH 17/44] new function `get_grouped_stats()` and support `build_request_v2()` --- R/build_request_v2.R | 40 ++++++++++ R/get_grouped_stats.R | 121 ++++++++++++++++++++++++++++++ grouped-stats_body_checks.R | 145 ++++++++++++++++++++++++++++++++++++ 3 files changed, 306 insertions(+) create mode 100644 R/build_request_v2.R create mode 100644 R/get_grouped_stats.R create mode 100644 grouped-stats_body_checks.R diff --git a/R/build_request_v2.R b/R/build_request_v2.R new file mode 100644 index 0000000..61b51f3 --- /dev/null +++ b/R/build_request_v2.R @@ -0,0 +1,40 @@ +#' Build request version 2 +#' +#' @param server character: Server. For WB internal use only +#' @param api_version character: API version +#' @param endpoint character: PIP API endpoint +#' @param ... +#' +#' @return httr2 request +#' +build_request_v2 <- function(server, + api_version, + endpoint, + ...) { + + + + base_url <- select_base_url(server = server) + params <- list(...) + + req <- httr2::request(base_url) |> + httr2::req_url_path_append(api_version) |> + httr2::req_url_path_append(endpoint) |> + # .multi = "comma" works fine without applying fix_params + httr2::req_url_query(!!!params, .multi = "comma") |> + httr2::req_cache(tools::R_user_dir("pipr", which = "cache"), + use_on_error = TRUE, + debug = TRUE) |> + httr2::req_user_agent(pipr_user_agent) |> + httr2::req_error(body = parse_error_body) |> + httr2::req_retry( + is_transient = pip_is_transient, + after = retry_after, + max_seconds = 60 + ) + + + return(req) + +} + diff --git a/R/get_grouped_stats.R b/R/get_grouped_stats.R new file mode 100644 index 0000000..3c9bae4 --- /dev/null +++ b/R/get_grouped_stats.R @@ -0,0 +1,121 @@ + +#' Get grouped stats +#' +#' Get grouped stats from the PIP API. +#' @inheritParams get_stats +#' @param cum_welfare numeric: Cumulative welfare values. +#' @param cum_population numeric: Cumulative population values. +#' @param endpoint character: One of "grouped-stats", "lorenz-curve", "regression-params". +#' @param requested_mean numeric: Requested mean. +#' @param povline numeric: Poverty line. Required for endpoint = "grouped-stats". +#' @param n_bins numeric: Number of bins. Required for endpoint = "lorenz-curve". + +get_grouped_stats <- function(cum_welfare = NULL, + cum_population = NULL, + endpoint = c("grouped-stats", "lorenz-curve", "regression-params"), + requested_mean = NULL, # grouped-stats specific + povline = NULL, # grouped-stats specific + #lorenz = NULL, # lorenz-curve specific (not working for now) + n_bins = NULL, # lorenz-curve specific + api_version = "v1", + format = c("rds", "json", "csv"), # arrow does not work. + simplify = TRUE, + server = NULL) { + + # 0. Match args ------------------------------------------------------------- + endpoint <- match.arg(endpoint) + api_version <- match.arg(api_version) + format <- match.arg(format) + + # 0. General Args checks (Error is general otherwise) ----------------------- + if (length(cum_welfare) != length(cum_population)) { + cli::cli_abort("{.val cum_welfare} and {.val cum_population} must have the same length.") + } + + # 1. endpoint = grouped-stats ------------------------------------------------ + if (endpoint == "grouped-stats") { + + # 1.1 grouped-stats args checks ------ + if (is.null(requested_mean)) { + cli::cli_abort("For endpoint {endpoint}, {.val requested_mean} must be provided.") + } + if (is.null(povline)) { + cli::cli_abort("For endpoint {endpoint}, {.val povline} must be provided.") + } + + + # 1.2 Build request for grouped-stats ------ + req <- build_request_v2( + cum_welfare = cum_welfare, + cum_population = cum_population, + requested_mean = requested_mean, + povline = povline, + format = format, + server = server, + api_version = api_version, + endpoint = endpoint + ) + + # 1.3 Perform request ------ + res <- req |> + httr2::req_perform() + + # 1.4 Parse result ------ + out <- parse_response(res, simplify) + + # 1.5 Return ------ + return(out) + } + + # 2. endpoint = lorenz-curve ------------------------------------------------ + if (endpoint == "lorenz-curve") { + + + # 2.1 Build request for lorenz-curve ------ + req <- build_request_v2( + cum_welfare = cum_welfare, + cum_population = cum_population, + #lorenz = lorenz, + n_bins = n_bins, + format = format, + server = server, + api_version = api_version, + endpoint = endpoint + ) + + # 2.2 Perform request ------ + res <- req |> + httr2::req_perform() + + # 2.3 Parse result ------ + out <- parse_response(res, simplify) + + # 2.4 Return ------ + return(out) + } + + # 3. endpoint = regress-params ----------------------------------------------- + if (endpoint == "regression-params") { + + + # 3.2 Build request for regress-params + req <- build_request_v2( + cum_welfare = cum_welfare, + cum_population = cum_population, + format = format, + server = server, + api_version = api_version, + endpoint = endpoint + ) + + # 3.3 Perform request ------ + res <- req |> + httr2::req_perform() + + # 3.4 Parse result ------ + out <- parse_response(res, simplify) + + # 3.5 Return ------ + return(out) + } +} diff --git a/grouped-stats_body_checks.R b/grouped-stats_body_checks.R new file mode 100644 index 0000000..3d065e0 --- /dev/null +++ b/grouped-stats_body_checks.R @@ -0,0 +1,145 @@ +# Grouped stats body checks + +# Notes: +# 1. Error when cum_welfare != cum_population is not specific, it just says ! HTTP 404 Not Found. -> Should I block this? +# 2. Error when format = "arrow", when format rds and json I get a tibble with 10 rows, when format = csv I get 1 row (+ cols) (format is set to NULL in pipapi) +# 3. I don't think I can send other parameters to the endpoint, popshare doesn't work, nor any other parameter not declared in the endpoint. +# 4. Note that without mean and povline it fails (so there is not a default.) +# 5. I cannot supply the lorenz fit to the endpoint because in the endpoint it is passed with the other parameters to "as.numeric" and it makes it fail. + +# 1. Body checks ----- +## 1. Args ---- +api_version = "v1" +format = "rds" +simplify = TRUE +server = NULL +endpoint = "grouped-stats" + +cum_welfare = c(0.0002,0.0006,0.0011,0.0021,0.0031,0.0048,0.0066,0.0095,0.0128,0.0177,0.0229,0.0355,0.0513,0.0689,0.0882) +cum_population = c(0.001,0.003,0.005,0.009,0.013,0.019,0.025,0.034,0.044,0.0581,0.0721,0.1041,0.1411,0.1792, 0.3) +requested_mean = 2 +povline = 1.9 +lorenz = "lq" +n_bins = NULL +# popshare = 0.5 +#format = "csv" +#format = "json" +#format = "arrow" + +## 2. Match args ---- +#api_version <- match.arg(api_version) +#format <- match.arg(format) + +## 3. Args checks ---- +if (length(cum_welfare) != length(cum_population)) { + cli::cli_abort("{.val cum_welfare} and {.val cum_population} must have the same length.") +} + + + +## 4.1 Request grouped-stats ---- +endpoint = "grouped-stats" +req_gd <- build_request_v2( + cum_welfare = cum_welfare, + cum_population = cum_population, + requested_mean = requested_mean, + povline = povline, + format = format, + server = server, + api_version = api_version, + endpoint = endpoint + ) + +res <- req_gd |> + httr2::req_perform() + +parse_response(res, simplify = simplify) + +## 4.2 Request lorenz-curve ---- +endpoint = "lorenz-curve" +req_lorenz <- build_request_v2( + cum_welfare = cum_welfare, + cum_population = cum_population, + #lorenz = lorenz, + n_bins = n_bins, + #format = format, + server = server, + api_version = api_version, + endpoint = endpoint +) + + +res <- req_lorenz |> + httr2::req_perform() + +parse_response(res, simplify = simplify) + + +## 4.3 Request regression-params ---- +endpoint = "regression-params" +req_lreg <- build_request_v2( + cum_welfare = cum_welfare, + cum_population = cum_population, + #format = format, + server = server, + api_version = api_version, + endpoint = endpoint +) + + +# 5. Perform request +res <- req_reg |> + httr2::req_perform() + +parse_response(res, simplify = simplify) + + + +# 2. Function checks ----- +cum_welfare = c(0.0002,0.0006,0.0011,0.0021,0.0031,0.0048,0.0066,0.0095,0.0128,0.0177,0.0229,0.0355,0.0513,0.0689,0.0882) +cum_population = c(0.001,0.003,0.005,0.009,0.013,0.019,0.025,0.034,0.044,0.0581,0.0721,0.1041,0.1411,0.1792, 0.3) + + +## grouped_stats ----- +get_grouped_stats(cum_population = cum_population, + cum_welfare = cum_welfare, + povline = 1.9, + requested_mean = 2.1, + endpoint = "grouped-stats") + +### errors ---- +get_grouped_stats(cum_population = cum_population, + cum_welfare = cum_welfare, + povline = NULL, + requested_mean = 2.1, + endpoint = "grouped-stats") + +get_grouped_stats(cum_population = cum_population, + cum_welfare = cum_welfare, + povline = 1.9, + requested_mean = NULL, + endpoint = "grouped-stats") + +cum_population = c(0.001,0.003,0.005,0.009,0.013,0.019,0.025,0.034,0.044,0.0581,0.0721,0.1041,0.1411,0.1792) +get_grouped_stats(cum_population = cum_population, + cum_welfare = cum_welfare, + povline = 1.9, + requested_mean = 2.1, + endpoint = "grouped-stats") + + +## lorenz-curve ---- +get_grouped_stats(cum_population = cum_population, + cum_welfare = cum_welfare, + n_bins = 13, + endpoint = "lorenz-curve") + + + + +## regress-params ----- +get_grouped_stats(cum_population = cum_population, + cum_welfare = cum_welfare, + endpoint = "regression-params") + + From 668e7d29dfe28b1326d38273c472e664594f5db9 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Thu, 3 Oct 2024 12:27:56 -0400 Subject: [PATCH 18/44] changes proposed --- R/get_grouped_stats.R | 4 ++-- grouped-stats_body_checks.R | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/get_grouped_stats.R b/R/get_grouped_stats.R index 3c9bae4..d0ecc03 100644 --- a/R/get_grouped_stats.R +++ b/R/get_grouped_stats.R @@ -12,13 +12,13 @@ get_grouped_stats <- function(cum_welfare = NULL, cum_population = NULL, - endpoint = c("grouped-stats", "lorenz-curve", "regression-params"), + endpoint = c("grouped-stats", "lorenz-curve", "regression-params"), # TO-DO: estimate (stats, lorenz, params) requested_mean = NULL, # grouped-stats specific povline = NULL, # grouped-stats specific #lorenz = NULL, # lorenz-curve specific (not working for now) n_bins = NULL, # lorenz-curve specific api_version = "v1", - format = c("rds", "json", "csv"), # arrow does not work. + format = c("rds", "json", "csv"), # TO-DO: arrow does not work. -> use data.table to pivot simplify = TRUE, server = NULL) { diff --git a/grouped-stats_body_checks.R b/grouped-stats_body_checks.R index 3d065e0..8bce547 100644 --- a/grouped-stats_body_checks.R +++ b/grouped-stats_body_checks.R @@ -105,7 +105,8 @@ get_grouped_stats(cum_population = cum_population, cum_welfare = cum_welfare, povline = 1.9, requested_mean = 2.1, - endpoint = "grouped-stats") + endpoint = "grouped-stats", + format = "csv") ### errors ---- get_grouped_stats(cum_population = cum_population, From 82859fa2985af7cfb1f0f62c961e5dc358a9e9e5 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Fri, 4 Oct 2024 06:46:49 -0400 Subject: [PATCH 19/44] last changes (estimate instead of endpoint, pivoted result). --- R/get_grouped_stats.R | 29 +++++++++++++++-------------- R/utils.R | 16 ++++++++++++++++ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/R/get_grouped_stats.R b/R/get_grouped_stats.R index d0ecc03..09dc705 100644 --- a/R/get_grouped_stats.R +++ b/R/get_grouped_stats.R @@ -12,18 +12,18 @@ get_grouped_stats <- function(cum_welfare = NULL, cum_population = NULL, - endpoint = c("grouped-stats", "lorenz-curve", "regression-params"), # TO-DO: estimate (stats, lorenz, params) - requested_mean = NULL, # grouped-stats specific - povline = NULL, # grouped-stats specific + estimate = c("stats", "lorenz", "params"), # TO-DO: estimate (stats, lorenz, params) + requested_mean = 1, # grouped-stats specific + povline = 1, # grouped-stats specific #lorenz = NULL, # lorenz-curve specific (not working for now) n_bins = NULL, # lorenz-curve specific api_version = "v1", - format = c("rds", "json", "csv"), # TO-DO: arrow does not work. -> use data.table to pivot + format = c("rds", "json", "csv"), # TO-DO: arrow does not work. -> use data.table to pivot and return in .rds format. simplify = TRUE, server = NULL) { # 0. Match args ------------------------------------------------------------- - endpoint <- match.arg(endpoint) + estimate <- match.arg(estimate) api_version <- match.arg(api_version) format <- match.arg(format) @@ -33,7 +33,9 @@ get_grouped_stats <- function(cum_welfare = NULL, } # 1. endpoint = grouped-stats ------------------------------------------------ - if (endpoint == "grouped-stats") { + if (estimate == "stats") { + + endpoint <- "grouped-stats" # 1.1 grouped-stats args checks ------ if (is.null(requested_mean)) { @@ -63,13 +65,12 @@ get_grouped_stats <- function(cum_welfare = NULL, # 1.4 Parse result ------ out <- parse_response(res, simplify) - # 1.5 Return ------ - return(out) } # 2. endpoint = lorenz-curve ------------------------------------------------ - if (endpoint == "lorenz-curve") { + if (estimate == "lorenz") { + endpoint <- "lorenz-curve" # 2.1 Build request for lorenz-curve ------ req <- build_request_v2( @@ -90,13 +91,12 @@ get_grouped_stats <- function(cum_welfare = NULL, # 2.3 Parse result ------ out <- parse_response(res, simplify) - # 2.4 Return ------ - return(out) } # 3. endpoint = regress-params ----------------------------------------------- - if (endpoint == "regression-params") { + if (estimate == "params") { + endpoint <- "regression-params" # 3.2 Build request for regress-params req <- build_request_v2( @@ -115,7 +115,8 @@ get_grouped_stats <- function(cum_welfare = NULL, # 3.4 Parse result ------ out <- parse_response(res, simplify) - # 3.5 Return ------ - return(out) } + + return(out) + } diff --git a/R/utils.R b/R/utils.R index 03ade8b..78de80a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -120,10 +120,12 @@ parse_response <- function(res, simplify) { if (type == "application/vnd.apache.arrow.file") { parsed <- arrow::read_feather(res$body) + # GC: right now arrow not working with grouped-stats, so I won't pivot. } if (type == "application/json") { parsed <- jsonlite::fromJSON(httr2::resp_body_string(res, encoding = "UTF-8")) + parsed <- change_grouped_stats_to_csv(parsed) # GC: used to pivot. } if (type == "text/csv") { parsed <- suppressMessages(vroom::vroom( @@ -132,6 +134,7 @@ parse_response <- function(res, simplify) { } if (type == "application/rds") { parsed <- unserialize(res$body) + parsed <- change_grouped_stats_to_csv(parsed) # GC: used to pivot. } if (simplify) { @@ -381,3 +384,16 @@ get_cache_info <- function() { message(cli::format_message(c("Cache status:", "i" = paste0(n_cached, message_text, cache_path)))) } + + +#' Change the list-output to dataframe (Function from pipapi) +#' +#' @param out output from wbpip::gd_compute_pip_stats +#' +#' @return dataframe +#' @export +change_grouped_stats_to_csv <- function(out) { + out[paste0("decile", seq_along(out$deciles))] <- out$deciles + out$deciles <- NULL + data.frame(out) +} From 7769f4a65b709a894d05a6579029df446d4d8d5e Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 7 Oct 2024 15:02:42 -0400 Subject: [PATCH 20/44] changed name to get_gd(), check() passed --- NAMESPACE | 1 + R/build_request.R | 45 ++++++++- R/build_request_v2.R | 40 -------- R/get_cp.R | 3 + R/{get_grouped_stats.R => get_gd.R} | 30 +++--- R/get_stats.R | 21 ++++ R/utils.R | 17 ++++ grouped-stats_body_checks.R | 146 ---------------------------- man/build_request.Rd | 4 +- man/build_request_old.Rd | 23 +++++ man/change_grouped_stats_to_csv.Rd | 17 ++++ man/get_gd.Rd | 44 +++++++++ man/get_stats.Rd | 3 + 13 files changed, 190 insertions(+), 204 deletions(-) delete mode 100644 R/build_request_v2.R create mode 100644 R/get_cp.R rename R/{get_grouped_stats.R => get_gd.R} (75%) delete mode 100644 grouped-stats_body_checks.R create mode 100644 man/build_request_old.Rd create mode 100644 man/change_grouped_stats_to_csv.Rd create mode 100644 man/get_gd.Rd diff --git a/NAMESPACE b/NAMESPACE index 36d4135..1d56d3f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(call_aux) +export(change_grouped_stats_to_csv) export(check_api) export(delete_cache) export(display_aux) diff --git a/R/build_request.R b/R/build_request.R index b148732..6570751 100644 --- a/R/build_request.R +++ b/R/build_request.R @@ -1,4 +1,4 @@ -#' build_request +#' Build request version 2 #' #' @param server character: Server. For WB internal use only #' @param api_version character: API version @@ -12,6 +12,49 @@ build_request <- function(server, endpoint, ...) { + + + base_url <- select_base_url(server = server) + params <- list(...) + + req <- httr2::request(base_url) |> + httr2::req_url_path_append(api_version) |> + httr2::req_url_path_append(endpoint) |> + # .multi = "comma" works fine without applying fix_params + httr2::req_url_query(!!!params, .multi = "comma") |> + httr2::req_cache(tools::R_user_dir("pipr", which = "cache"), + use_on_error = TRUE, + debug = TRUE) |> + httr2::req_user_agent(pipr_user_agent) |> + httr2::req_error(body = parse_error_body) |> + httr2::req_retry( + is_transient = pip_is_transient, + after = retry_after, + max_seconds = 60 + ) + + + return(req) + +} + + + + +#' build_request, OLD version +#' +#' @param server character: Server. For WB internal use only +#' @param api_version character: API version +#' @param endpoint character: PIP API endpoint +#' @param ... other parameters +#' +#' @return httr2 request +#' +build_request_old <- function(server, + api_version, + endpoint, + ...) { + base_url <- select_base_url(server = server) params <- list(...) diff --git a/R/build_request_v2.R b/R/build_request_v2.R deleted file mode 100644 index 61b51f3..0000000 --- a/R/build_request_v2.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Build request version 2 -#' -#' @param server character: Server. For WB internal use only -#' @param api_version character: API version -#' @param endpoint character: PIP API endpoint -#' @param ... -#' -#' @return httr2 request -#' -build_request_v2 <- function(server, - api_version, - endpoint, - ...) { - - - - base_url <- select_base_url(server = server) - params <- list(...) - - req <- httr2::request(base_url) |> - httr2::req_url_path_append(api_version) |> - httr2::req_url_path_append(endpoint) |> - # .multi = "comma" works fine without applying fix_params - httr2::req_url_query(!!!params, .multi = "comma") |> - httr2::req_cache(tools::R_user_dir("pipr", which = "cache"), - use_on_error = TRUE, - debug = TRUE) |> - httr2::req_user_agent(pipr_user_agent) |> - httr2::req_error(body = parse_error_body) |> - httr2::req_retry( - is_transient = pip_is_transient, - after = retry_after, - max_seconds = 60 - ) - - - return(req) - -} - diff --git a/R/get_cp.R b/R/get_cp.R new file mode 100644 index 0000000..b191616 --- /dev/null +++ b/R/get_cp.R @@ -0,0 +1,3 @@ +# get_cp <- function() { +# print("hello") +# } diff --git a/R/get_grouped_stats.R b/R/get_gd.R similarity index 75% rename from R/get_grouped_stats.R rename to R/get_gd.R index 09dc705..6cb2b92 100644 --- a/R/get_grouped_stats.R +++ b/R/get_gd.R @@ -5,22 +5,22 @@ #' @inheritParams get_stats #' @param cum_welfare numeric: Cumulative welfare values. #' @param cum_population numeric: Cumulative population values. -#' @param endpoint character: One of "grouped-stats", "lorenz-curve", "regression-params". +#' @param estimate character: One of "stats", "lorenz", "params". #' @param requested_mean numeric: Requested mean. #' @param povline numeric: Poverty line. Required for endpoint = "grouped-stats". #' @param n_bins numeric: Number of bins. Required for endpoint = "lorenz-curve". -get_grouped_stats <- function(cum_welfare = NULL, - cum_population = NULL, - estimate = c("stats", "lorenz", "params"), # TO-DO: estimate (stats, lorenz, params) - requested_mean = 1, # grouped-stats specific - povline = 1, # grouped-stats specific - #lorenz = NULL, # lorenz-curve specific (not working for now) - n_bins = NULL, # lorenz-curve specific - api_version = "v1", - format = c("rds", "json", "csv"), # TO-DO: arrow does not work. -> use data.table to pivot and return in .rds format. - simplify = TRUE, - server = NULL) { +get_gd <- function(cum_welfare = NULL, + cum_population = NULL, + estimate = c("stats", "lorenz", "params"), # TO-DO: estimate (stats, lorenz, params) + requested_mean = 1, # grouped-stats specific + povline = 1, # grouped-stats specific + #lorenz = NULL, # lorenz-curve specific (not working for now) + n_bins = NULL, # lorenz-curve specific + api_version = "v1", + format = c("rds", "json", "csv"), # TO-DO: arrow does not work. -> use data.table to pivot and return in .rds format. + simplify = TRUE, + server = NULL) { # 0. Match args ------------------------------------------------------------- estimate <- match.arg(estimate) @@ -47,7 +47,7 @@ get_grouped_stats <- function(cum_welfare = NULL, # 1.2 Build request for grouped-stats ------ - req <- build_request_v2( + req <- build_request( cum_welfare = cum_welfare, cum_population = cum_population, requested_mean = requested_mean, @@ -73,7 +73,7 @@ get_grouped_stats <- function(cum_welfare = NULL, endpoint <- "lorenz-curve" # 2.1 Build request for lorenz-curve ------ - req <- build_request_v2( + req <- build_request( cum_welfare = cum_welfare, cum_population = cum_population, #lorenz = lorenz, @@ -99,7 +99,7 @@ get_grouped_stats <- function(cum_welfare = NULL, endpoint <- "regression-params" # 3.2 Build request for regress-params - req <- build_request_v2( + req <- build_request( cum_welfare = cum_welfare, cum_population = cum_population, format = format, diff --git a/R/get_stats.R b/R/get_stats.R index 34319fe..f88f195 100644 --- a/R/get_stats.R +++ b/R/get_stats.R @@ -8,6 +8,7 @@ #' poverty line #' @param fill_gaps logical: If TRUE, will interpolate / extrapolate values for #' missing years +#' @param nowcast logical: If TRUE, will return nowcast estimates. #' @param subgroup character: If used result will be aggregated for predefined #' sub-groups. Either 'wb_regions' or 'none'. #' @param welfare_type character: Welfare type either of c("all", "income", "consumption") @@ -62,6 +63,7 @@ get_stats <- function(country = "all", povline = NULL, popshare = NULL, fill_gaps = FALSE, + nowcast = FALSE, subgroup = NULL, welfare_type = c("all", "income", "consumption"), reporting_level = c("all", "national", "urban", "rural"), @@ -77,11 +79,20 @@ get_stats <- function(country = "all", reporting_level <- match.arg(reporting_level) api_version <- match.arg(api_version) format <- match.arg(format) + # popshare can't be used together with povline if (!is.null(popshare)) povline <- NULL + # nowcast = TRUE -> fill_gaps = TRUE + if (nowcast) fill_gaps <- TRUE + + # otherwise we cannot filter correctly because estimate_type not returned + if (isFALSE(fill_gaps)) nowcast <- FALSE + + # subgroup can't be used together with fill_gaps if (!is.null(subgroup)) { fill_gaps <- NULL # subgroup can't be used together with fill_gaps + nowcast <- NULL # assuming this is the same for nowcast endpoint <- "pip-grp" subgroup <- match.arg(subgroup, c("none", "wb_regions")) if (subgroup == "wb_regions") { @@ -94,6 +105,7 @@ get_stats <- function(country = "all", group_by <- NULL } + # Build query string req <- build_request( country = country, @@ -101,6 +113,7 @@ get_stats <- function(country = "all", povline = povline, popshare = popshare, fill_gaps = fill_gaps, + nowcast = nowcast, group_by = group_by, welfare_type = welfare_type, reporting_level = reporting_level, @@ -112,12 +125,20 @@ get_stats <- function(country = "all", api_version = api_version, endpoint = endpoint ) + # Perform request res <- req |> httr2::req_perform() # Parse result out <- parse_response(res, simplify) + print(out) + + # Filter nowcast + ## (only when simplify == TRUE) because filtering happens after the request is returned. + if ( !is.null(nowcast) & isFALSE(nowcast) & simplify == TRUE) { + out <- out[!grepl("nowcast", out$estimate_type),] + } return(out) } diff --git a/R/utils.R b/R/utils.R index 78de80a..caaa778 100644 --- a/R/utils.R +++ b/R/utils.R @@ -112,6 +112,17 @@ build_args <- function(.country = NULL, #' @keywords internal parse_response <- function(res, simplify) { + # Classify the response type + res_health <- FALSE + if (grepl("health-check", res$url)) { + res_health <- TRUE + } + + pip_info <- FALSE + if (grepl("pip-info", res$url)) { + pip_info <- TRUE + } + # Get response type type <- tryCatch(suppressWarnings(httr2::resp_content_type(res)), error = function(e) NULL) @@ -124,9 +135,15 @@ parse_response <- function(res, simplify) { } if (type == "application/json") { + + if (res_health | pip_info) { + parsed <- jsonlite::fromJSON(httr2::resp_body_string(res, encoding = "UTF-8")) + } else { parsed <- jsonlite::fromJSON(httr2::resp_body_string(res, encoding = "UTF-8")) parsed <- change_grouped_stats_to_csv(parsed) # GC: used to pivot. + } } + if (type == "text/csv") { parsed <- suppressMessages(vroom::vroom( I(httr2::resp_body_string(res, encoding = "UTF-8"))) diff --git a/grouped-stats_body_checks.R b/grouped-stats_body_checks.R deleted file mode 100644 index 8bce547..0000000 --- a/grouped-stats_body_checks.R +++ /dev/null @@ -1,146 +0,0 @@ -# Grouped stats body checks - -# Notes: -# 1. Error when cum_welfare != cum_population is not specific, it just says ! HTTP 404 Not Found. -> Should I block this? -# 2. Error when format = "arrow", when format rds and json I get a tibble with 10 rows, when format = csv I get 1 row (+ cols) (format is set to NULL in pipapi) -# 3. I don't think I can send other parameters to the endpoint, popshare doesn't work, nor any other parameter not declared in the endpoint. -# 4. Note that without mean and povline it fails (so there is not a default.) -# 5. I cannot supply the lorenz fit to the endpoint because in the endpoint it is passed with the other parameters to "as.numeric" and it makes it fail. - -# 1. Body checks ----- -## 1. Args ---- -api_version = "v1" -format = "rds" -simplify = TRUE -server = NULL -endpoint = "grouped-stats" - -cum_welfare = c(0.0002,0.0006,0.0011,0.0021,0.0031,0.0048,0.0066,0.0095,0.0128,0.0177,0.0229,0.0355,0.0513,0.0689,0.0882) -cum_population = c(0.001,0.003,0.005,0.009,0.013,0.019,0.025,0.034,0.044,0.0581,0.0721,0.1041,0.1411,0.1792, 0.3) -requested_mean = 2 -povline = 1.9 -lorenz = "lq" -n_bins = NULL -# popshare = 0.5 -#format = "csv" -#format = "json" -#format = "arrow" - -## 2. Match args ---- -#api_version <- match.arg(api_version) -#format <- match.arg(format) - -## 3. Args checks ---- -if (length(cum_welfare) != length(cum_population)) { - cli::cli_abort("{.val cum_welfare} and {.val cum_population} must have the same length.") -} - - - -## 4.1 Request grouped-stats ---- -endpoint = "grouped-stats" -req_gd <- build_request_v2( - cum_welfare = cum_welfare, - cum_population = cum_population, - requested_mean = requested_mean, - povline = povline, - format = format, - server = server, - api_version = api_version, - endpoint = endpoint - ) - -res <- req_gd |> - httr2::req_perform() - -parse_response(res, simplify = simplify) - -## 4.2 Request lorenz-curve ---- -endpoint = "lorenz-curve" -req_lorenz <- build_request_v2( - cum_welfare = cum_welfare, - cum_population = cum_population, - #lorenz = lorenz, - n_bins = n_bins, - #format = format, - server = server, - api_version = api_version, - endpoint = endpoint -) - - -res <- req_lorenz |> - httr2::req_perform() - -parse_response(res, simplify = simplify) - - -## 4.3 Request regression-params ---- -endpoint = "regression-params" -req_lreg <- build_request_v2( - cum_welfare = cum_welfare, - cum_population = cum_population, - #format = format, - server = server, - api_version = api_version, - endpoint = endpoint -) - - -# 5. Perform request -res <- req_reg |> - httr2::req_perform() - -parse_response(res, simplify = simplify) - - - -# 2. Function checks ----- -cum_welfare = c(0.0002,0.0006,0.0011,0.0021,0.0031,0.0048,0.0066,0.0095,0.0128,0.0177,0.0229,0.0355,0.0513,0.0689,0.0882) -cum_population = c(0.001,0.003,0.005,0.009,0.013,0.019,0.025,0.034,0.044,0.0581,0.0721,0.1041,0.1411,0.1792, 0.3) - - -## grouped_stats ----- -get_grouped_stats(cum_population = cum_population, - cum_welfare = cum_welfare, - povline = 1.9, - requested_mean = 2.1, - endpoint = "grouped-stats", - format = "csv") - -### errors ---- -get_grouped_stats(cum_population = cum_population, - cum_welfare = cum_welfare, - povline = NULL, - requested_mean = 2.1, - endpoint = "grouped-stats") - -get_grouped_stats(cum_population = cum_population, - cum_welfare = cum_welfare, - povline = 1.9, - requested_mean = NULL, - endpoint = "grouped-stats") - -cum_population = c(0.001,0.003,0.005,0.009,0.013,0.019,0.025,0.034,0.044,0.0581,0.0721,0.1041,0.1411,0.1792) -get_grouped_stats(cum_population = cum_population, - cum_welfare = cum_welfare, - povline = 1.9, - requested_mean = 2.1, - endpoint = "grouped-stats") - - -## lorenz-curve ---- -get_grouped_stats(cum_population = cum_population, - cum_welfare = cum_welfare, - n_bins = 13, - endpoint = "lorenz-curve") - - - - -## regress-params ----- -get_grouped_stats(cum_population = cum_population, - cum_welfare = cum_welfare, - endpoint = "regression-params") - - diff --git a/man/build_request.Rd b/man/build_request.Rd index 96c72d2..5d6b647 100644 --- a/man/build_request.Rd +++ b/man/build_request.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/build_request.R \name{build_request} \alias{build_request} -\title{build_request} +\title{Build request version 2} \usage{ build_request(server, api_version, endpoint, ...) } @@ -19,5 +19,5 @@ build_request(server, api_version, endpoint, ...) httr2 request } \description{ -build_request +Build request version 2 } diff --git a/man/build_request_old.Rd b/man/build_request_old.Rd new file mode 100644 index 0000000..75d3f02 --- /dev/null +++ b/man/build_request_old.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_request.R +\name{build_request_old} +\alias{build_request_old} +\title{build_request, OLD version} +\usage{ +build_request_old(server, api_version, endpoint, ...) +} +\arguments{ +\item{server}{character: Server. For WB internal use only} + +\item{api_version}{character: API version} + +\item{endpoint}{character: PIP API endpoint} + +\item{...}{other parameters} +} +\value{ +httr2 request +} +\description{ +build_request, OLD version +} diff --git a/man/change_grouped_stats_to_csv.Rd b/man/change_grouped_stats_to_csv.Rd new file mode 100644 index 0000000..353b439 --- /dev/null +++ b/man/change_grouped_stats_to_csv.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{change_grouped_stats_to_csv} +\alias{change_grouped_stats_to_csv} +\title{Change the list-output to dataframe (Function from pipapi)} +\usage{ +change_grouped_stats_to_csv(out) +} +\arguments{ +\item{out}{output from wbpip::gd_compute_pip_stats} +} +\value{ +dataframe +} +\description{ +Change the list-output to dataframe (Function from pipapi) +} diff --git a/man/get_gd.Rd b/man/get_gd.Rd new file mode 100644 index 0000000..cfdef04 --- /dev/null +++ b/man/get_gd.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_gd.R +\name{get_gd} +\alias{get_gd} +\title{Get grouped stats} +\usage{ +get_gd( + cum_welfare = NULL, + cum_population = NULL, + estimate = c("stats", "lorenz", "params"), + requested_mean = 1, + povline = 1, + n_bins = NULL, + api_version = "v1", + format = c("rds", "json", "csv"), + simplify = TRUE, + server = NULL +) +} +\arguments{ +\item{cum_welfare}{numeric: Cumulative welfare values.} + +\item{cum_population}{numeric: Cumulative population values.} + +\item{estimate}{character: One of "stats", "lorenz", "params".} + +\item{requested_mean}{numeric: Requested mean.} + +\item{povline}{numeric: Poverty line. Required for endpoint = "grouped-stats".} + +\item{n_bins}{numeric: Number of bins. Required for endpoint = "lorenz-curve".} + +\item{api_version}{character: API version} + +\item{format}{character: Response format either of c("rds", "json", "csv")} + +\item{simplify}{logical: If TRUE (the default) the response is returned as a +\code{tibble}} + +\item{server}{character: Server. For WB internal use only} +} +\description{ +Get grouped stats from the PIP API. +} diff --git a/man/get_stats.Rd b/man/get_stats.Rd index 9752fbc..daf1df2 100644 --- a/man/get_stats.Rd +++ b/man/get_stats.Rd @@ -11,6 +11,7 @@ get_stats( povline = NULL, popshare = NULL, fill_gaps = FALSE, + nowcast = FALSE, subgroup = NULL, welfare_type = c("all", "income", "consumption"), reporting_level = c("all", "national", "urban", "rural"), @@ -49,6 +50,8 @@ poverty line} \item{fill_gaps}{logical: If TRUE, will interpolate / extrapolate values for missing years} +\item{nowcast}{logical: If TRUE, will return nowcast estimates.} + \item{subgroup}{character: If used result will be aggregated for predefined sub-groups. Either 'wb_regions' or 'none'.} From c2199856604dd1cb0f8af04be5f209fc51e07bed Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 7 Oct 2024 15:12:31 -0400 Subject: [PATCH 21/44] some typos corrected --- R/get_stats.R | 1 - R/utils.R | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/R/get_stats.R b/R/get_stats.R index f88f195..afde51e 100644 --- a/R/get_stats.R +++ b/R/get_stats.R @@ -132,7 +132,6 @@ get_stats <- function(country = "all", # Parse result out <- parse_response(res, simplify) - print(out) # Filter nowcast ## (only when simplify == TRUE) because filtering happens after the request is returned. diff --git a/R/utils.R b/R/utils.R index caaa778..670d5ad 100644 --- a/R/utils.R +++ b/R/utils.R @@ -112,7 +112,7 @@ build_args <- function(.country = NULL, #' @keywords internal parse_response <- function(res, simplify) { - # Classify the response type + # Classify the response url res_health <- FALSE if (grepl("health-check", res$url)) { res_health <- TRUE From 086df66ad279bdbdc732cc0a3a2cbddbb48716f5 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 7 Oct 2024 16:43:32 -0400 Subject: [PATCH 22/44] popshare added back to grouped-stats --- R/get_cp.R | 41 ++++++++++++++++++++++++++++++++++++++--- R/get_gd.R | 4 ++++ man/get_gd.Rd | 4 ++++ 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/R/get_cp.R b/R/get_cp.R index b191616..aa1cb4c 100644 --- a/R/get_cp.R +++ b/R/get_cp.R @@ -1,3 +1,38 @@ -# get_cp <- function() { -# print("hello") -# } + + + + +get_cp <- function(country = "all", + povline = NULL, + ppp_version = NULL, + api_version = "v1", + format = c("rds", "json", "csv"), + simplify = TRUE, + server = NULL) { + + + # Match args + api_version <- match.arg(api_version) + format <- match.arg(format) + + # Build query string + req <- build_request( + country = country, + povline = povline, + ppp_version = ppp_version, + format = format, + server = server, + api_version = api_version, + endpoint = "cp-download" + ) + + # Perform request + res <- req |> + httr2::req_perform() + + # Parse result + out <- parse_response(res, simplify) + + return(out) + +} diff --git a/R/get_gd.R b/R/get_gd.R index 6cb2b92..b7e98cc 100644 --- a/R/get_gd.R +++ b/R/get_gd.R @@ -15,6 +15,7 @@ get_gd <- function(cum_welfare = NULL, estimate = c("stats", "lorenz", "params"), # TO-DO: estimate (stats, lorenz, params) requested_mean = 1, # grouped-stats specific povline = 1, # grouped-stats specific + popshare = NULL, #lorenz = NULL, # lorenz-curve specific (not working for now) n_bins = NULL, # lorenz-curve specific api_version = "v1", @@ -45,6 +46,8 @@ get_gd <- function(cum_welfare = NULL, cli::cli_abort("For endpoint {endpoint}, {.val povline} must be provided.") } + # popshare can't be used together with povline + if (!is.null(popshare)) povline <- NULL # 1.2 Build request for grouped-stats ------ req <- build_request( @@ -52,6 +55,7 @@ get_gd <- function(cum_welfare = NULL, cum_population = cum_population, requested_mean = requested_mean, povline = povline, + popshare = popshare, format = format, server = server, api_version = api_version, diff --git a/man/get_gd.Rd b/man/get_gd.Rd index cfdef04..dd7e5ad 100644 --- a/man/get_gd.Rd +++ b/man/get_gd.Rd @@ -10,6 +10,7 @@ get_gd( estimate = c("stats", "lorenz", "params"), requested_mean = 1, povline = 1, + popshare = NULL, n_bins = NULL, api_version = "v1", format = c("rds", "json", "csv"), @@ -28,6 +29,9 @@ get_gd( \item{povline}{numeric: Poverty line. Required for endpoint = "grouped-stats".} +\item{popshare}{numeric: Proportion of the population living below the +poverty line} + \item{n_bins}{numeric: Number of bins. Required for endpoint = "lorenz-curve".} \item{api_version}{character: API version} From e061e189969262d759d5e5dda05f078bc6eb7d44 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Wed, 9 Oct 2024 13:53:20 -0400 Subject: [PATCH 23/44] final get_cp() draft, no tests --- NAMESPACE | 1 + R/get_cp.R | 49 +++++++++++++++++++++++++++++++++++-------- man/get_cp.Rd | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 9 deletions(-) create mode 100644 man/get_cp.Rd diff --git a/NAMESPACE b/NAMESPACE index 1d56d3f..ddc6ba4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(delete_cache) export(display_aux) export(get_aux) export(get_cache_info) +export(get_cp) export(get_pip_info) export(get_stats) export(get_versions) diff --git a/R/get_cp.R b/R/get_cp.R index aa1cb4c..600802e 100644 --- a/R/get_cp.R +++ b/R/get_cp.R @@ -1,36 +1,67 @@ - - - - +#' Get Country Profiles +#' +#' @inheritParams get_stats +#' +#' @return If `simplify = FALSE`, it returns a list of class "pip_api". If +#' `simplify = TRUE`, it returns a tibble with the requested data. This is the +#' default. Only for `get_aux()`, If `assign_tb = TRUE` or character, it +#' returns TRUE when data was assign properly to .pip env. FALSE, if it was +#' not assigned. +#' @export +#' +#' @examples +#' \dontrun{ +#' # One country-year +#' res <- get_cp(country = "AGO") +#' +#' # All countries and years +#' res <- get_cp() +#' } get_cp <- function(country = "all", povline = NULL, + version = NULL, ppp_version = NULL, + release_version = NULL, api_version = "v1", - format = c("rds", "json", "csv"), + format = c("arrow", "rds", "json", "csv"), simplify = TRUE, server = NULL) { - # Match args + # 0. Match args ---- api_version <- match.arg(api_version) format <- match.arg(format) - # Build query string + # 1. povline set-up ---- + # (GC: stata equivalent but no 2005 and default to 2.15) + if (is.null(povline)) { + if (ppp_version == "2011") { + povline <- 1.9 + } else { + povline <- 2.15 + } + } + + + # 2. Build query string ---- req <- build_request( country = country, povline = povline, + version = version, ppp_version = ppp_version, + release_version = release_version, format = format, server = server, api_version = api_version, endpoint = "cp-download" ) - # Perform request + + # 3. Perform request ---- res <- req |> httr2::req_perform() - # Parse result + # 4. Parse result and return out <- parse_response(res, simplify) return(out) diff --git a/man/get_cp.Rd b/man/get_cp.Rd new file mode 100644 index 0000000..599b315 --- /dev/null +++ b/man/get_cp.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cp.R +\name{get_cp} +\alias{get_cp} +\title{Get Country Profiles} +\usage{ +get_cp( + country = "all", + povline = NULL, + version = NULL, + ppp_version = NULL, + release_version = NULL, + api_version = "v1", + format = c("arrow", "rds", "json", "csv"), + simplify = TRUE, + server = NULL +) +} +\arguments{ +\item{country}{character: A vector with one or more \href{https://wits.worldbank.org/wits/wits/witshelp/content/codes/country_codes.htm}{country ISO 3 codes} or +'all'} + +\item{povline}{numeric: Poverty line} + +\item{version}{character: Data version. See \code{get_versions()}} + +\item{ppp_version}{ppp year to be used} + +\item{release_version}{date when the data was published in YYYYMMDD format} + +\item{api_version}{character: API version} + +\item{format}{character: Response format either of c("rds", "json", "csv")} + +\item{simplify}{logical: If TRUE (the default) the response is returned as a +\code{tibble}} + +\item{server}{character: Server. For WB internal use only} +} +\value{ +If \code{simplify = FALSE}, it returns a list of class "pip_api". If +\code{simplify = TRUE}, it returns a tibble with the requested data. This is the +default. Only for \code{get_aux()}, If \code{assign_tb = TRUE} or character, it +returns TRUE when data was assign properly to .pip env. FALSE, if it was +not assigned. +} +\description{ +Get Country Profiles +} +\examples{ +\dontrun{ +# One country-year +res <- get_cp(country = "AGO") + +# All countries and years +res <- get_cp() +} +} From 558c85e8ab3eec44c9a59fcc5d65052dfd6e381c Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 4 Nov 2024 05:02:20 -0500 Subject: [PATCH 24/44] draft get-gd --- vignettes/articles/get-gd-example.Rmd | 66 +++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 vignettes/articles/get-gd-example.Rmd diff --git a/vignettes/articles/get-gd-example.Rmd b/vignettes/articles/get-gd-example.Rmd new file mode 100644 index 0000000..4a64bf3 --- /dev/null +++ b/vignettes/articles/get-gd-example.Rmd @@ -0,0 +1,66 @@ +--- +title: "get-gd-example" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{pipr} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + purl = NOT_CRAN +) +``` + +```{r setup} +library(pipr) +``` + +The `get_gd()` function allows users to interact with the PIP API to retrieve grouped statistics based on welfare and population data. Depending on the chosen `estimate` parameter, it can retrieve: + +- grouped statistics (`estimate = "stats"`). +- Lorenz curve data (`estimate = "lorenz"`). +- regression parameters (`estimate = "params"`). + +Here are a few examples to get you started: + +## Using default options + +By default, `get_gd()` returns grouped statistics (`estimate = "stats"`) based on cumulative welfare (`cum_welfare`) and population values (`cum_population`), both expressed as percentages. The default mean (`requested_mean`) and poverty line (`povline`) are set to 1. + +```{r} +cum_welfare <- c(0.1, 0.3, 0.6, 1.0) +cum_population <- c(0.2, 0.5, 0.8, 1.0) +get_gd(cum_welfare = cum_welfare, cum_population = cum_population) +``` + +## Basic Options + +### Retrieve grouped statistics + +To retrieve basic grouped statistics, you need to provide cumulative welfare and population values along with the requested mean and poverty line. +```{r} +get_gd( + cum_welfare = cum_welfare, + cum_population = cum_population, + estimate = "stats", + requested_mean = 3, + povline = 2.15 +) +``` + +### Retrieve Lorenz curve data +To retrieve Lorenz curve data, you can specify `estimate = "lorenz"` and provide the number of bins (`n_bins`) (There is no default value for `n_bins`). +```{r} +get_gd( + cum_welfare = cum_welfare, + cum_population = cum_population, + estimate = "lorenz", + n_bins = 10 +) +``` + From 60da6d749a27930fad3c05680373701ac303cade Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Fri, 8 Nov 2024 04:11:20 -0500 Subject: [PATCH 25/44] examples added to get_gd and lorenz parameter added back, not tested, waiting for pipapi to be updated --- R/get_gd.R | 65 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/R/get_gd.R b/R/get_gd.R index b7e98cc..d29a71f 100644 --- a/R/get_gd.R +++ b/R/get_gd.R @@ -3,23 +3,64 @@ #' #' Get grouped stats from the PIP API. #' @inheritParams get_stats -#' @param cum_welfare numeric: Cumulative welfare values. -#' @param cum_population numeric: Cumulative population values. +#' @param cum_welfare numeric: Cumulative welfare values, expressed in shares. Any length. They should be monotonically increasing, and sum to 1. +#' @param cum_population numeric: Cumulative population values, expressed in shares. Any length. They should be monotonically increasing, and sum to 1. #' @param estimate character: One of "stats", "lorenz", "params". #' @param requested_mean numeric: Requested mean. -#' @param povline numeric: Poverty line. Required for endpoint = "grouped-stats". -#' @param n_bins numeric: Number of bins. Required for endpoint = "lorenz-curve". +#' @param povline numeric: Poverty line. Required for estimate = "stats". +#' @param n_bins numeric: Number of bins. Required for estimate = "lorenz". +#' +#' @examples +#' \dontrun{ +#' +#' datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, 1), +#' L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, 0.23498, 0.34887, 051994, 0.64270, 0.79201, 0.86966, 0.91277, 1) +#' +#' # estimate = 'stats': retrieve poverty statistics. +#' stats <- get_gd(cum_welfare = datt_data$L, cum_population = datt_data$p, +#' estimate = "stats", +#' requested_mean = 19, # default is 1. +#' povline = 2.15) # default is 1. +#' +#' # estimate = 'lorenz': retrieve Lorenz curve data points for a specified number of bins. +#' +#' ## Best lorenz curve methodolody selected by default: +#' lorenz <- get_gd(cum_welfare = datt_data$L, +#' cum_population = datt_data$p, +#' estimate = "lorenz", +#' n_bins = 100) # must be specified, default is NULL. +#' +#' ## Specify lorenz curve methodology: +#' ### Beta Lorenz ("lb") +#' lorenz_lb <- get_gd(cum_welfare = datt_data$L, +#' cum_population = datt_data$p, +#' estimate = "lorenz", +#' lorenz = "lb", +#' n_bins = 100) +#' +#' ### Quadratic Lorenz ("lq") +#' lorenz_lq <- get_gd(cum_welfare = datt_data$L, +#' cum_population = datt_data$p, +#' estimate = "lorenz", +#' lorenz = "lq", +#' n_bins = 100) +#' +#' # estimate = 'params': retrieve regression parameters used for the lorenz curve estimation. +#' params <- get_gd(cum_welfare = datt_data$L, +#' cum_population = datt_data$p, +#' estimate = "params") +#' get_gd <- function(cum_welfare = NULL, cum_population = NULL, - estimate = c("stats", "lorenz", "params"), # TO-DO: estimate (stats, lorenz, params) - requested_mean = 1, # grouped-stats specific - povline = 1, # grouped-stats specific - popshare = NULL, - #lorenz = NULL, # lorenz-curve specific (not working for now) - n_bins = NULL, # lorenz-curve specific + estimate = c("stats", "lorenz", "params"), + requested_mean = 1, # stats specific. + povline = 1, # stats specific. + popshare = NULL, # stats specific. + lorenz = NULL, # lorenz specific. + n_bins = NULL, # lorenz specific. api_version = "v1", - format = c("rds", "json", "csv"), # TO-DO: arrow does not work. -> use data.table to pivot and return in .rds format. + format = c("rds", "json", "csv"), simplify = TRUE, server = NULL) { @@ -80,7 +121,7 @@ get_gd <- function(cum_welfare = NULL, req <- build_request( cum_welfare = cum_welfare, cum_population = cum_population, - #lorenz = lorenz, + lorenz = lorenz, n_bins = n_bins, format = format, server = server, From e0b92d0a228fbe1fc81f42f181aa01e8e36decc9 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Fri, 8 Nov 2024 05:37:18 -0500 Subject: [PATCH 26/44] get_gd() vignette draft (not tested as api not working yet but should work fine). --- vignettes/articles/get-gd-example.Rmd | 92 ++++++++++++++++++++------- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/vignettes/articles/get-gd-example.Rmd b/vignettes/articles/get-gd-example.Rmd index 4a64bf3..e1ab5fd 100644 --- a/vignettes/articles/get-gd-example.Rmd +++ b/vignettes/articles/get-gd-example.Rmd @@ -20,47 +20,95 @@ knitr::opts_chunk$set( library(pipr) ``` -The `get_gd()` function allows users to interact with the PIP API to retrieve grouped statistics based on welfare and population data. Depending on the chosen `estimate` parameter, it can retrieve: +The `get_gd()` function allows users to interact with the PIP API to retrieve grouped data statistics based on cumulative welfare and population data. Depending on the chosen `estimate` parameter, it can retrieve: -- grouped statistics (`estimate = "stats"`). -- Lorenz curve data (`estimate = "lorenz"`). -- regression parameters (`estimate = "params"`). +- poverty and inequality statistics (`estimate = "stats"`). +- Lorenz curve data points (`estimate = "lorenz"`). +- regression parameters used of the Lorenz curve estimation (`estimate = "params"`). Here are a few examples to get you started: -## Using default options +## Retrieve poverty and inequality statistics for grouped data -By default, `get_gd()` returns grouped statistics (`estimate = "stats"`) based on cumulative welfare (`cum_welfare`) and population values (`cum_population`), both expressed as percentages. The default mean (`requested_mean`) and poverty line (`povline`) are set to 1. +By default, `get_gd()` returns grouped statistics (`estimate = "stats"`) based on cumulative welfare (`cum_welfare`) and population values (`cum_population`), both expressed as percentages. The default mean (`requested_mean`) and poverty line (`povline`) are set to 1, so the user should specify the known mean of the distribution, and the desired poverty line. -```{r} -cum_welfare <- c(0.1, 0.3, 0.6, 1.0) -cum_population <- c(0.2, 0.5, 0.8, 1.0) -get_gd(cum_welfare = cum_welfare, cum_population = cum_population) -``` +The data used in this example is from Datt (1998). The dataset lists the cumulative welfare and population values for rural India in 1983 expressed in shares (percentages). The mean of the distribution is 109.9 Rs (daily), and the poverty line at the time was 89 Rs. Note that the cumulative welfare and population values should be monotonically increasing and sum to 1 to be valid. Additionally, the cumulative population values should always be greater or equal to the corresponding welfare values. -## Basic Options +```{r} +datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, 1), + L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, 0.23498, 0.34887, 0.51994, 0.64270, 0.79201, 0.86966, 0.91277, 1)) +datt_mean <- 109.9 -### Retrieve grouped statistics +datt_povline <- 89 +``` To retrieve basic grouped statistics, you need to provide cumulative welfare and population values along with the requested mean and poverty line. ```{r} get_gd( - cum_welfare = cum_welfare, - cum_population = cum_population, + cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "stats", + requested_mean = datt_mean, + povline = datt_povline + ) +``` + +As an alternative, instead of the mean, you can provide the population share (`popshare`), which will be assumed equal to the poverty headcount ratio, and used to calculate the rest of the statistics (and the poverty line itself): +```{r} +get_gd( + cum_welfare = datt_data$L, + cum_population = datt_data$p, estimate = "stats", - requested_mean = 3, - povline = 2.15 + requested_mean = datt_mean, + popshare = 0.3 + ) + +``` + + +## Retrieve Lorenz curve data + +To retrieve Lorenz curve data, you can specify `estimate = "lorenz"` and provide the number of bins (`n_bins`) to return (there is no default value for `n_bins`). The Lorenz curve will be estimated with both the Beta Lorenz and Quadratic Lorenz methodologies, then the best one will be selected by default. + +```{r} +get_gd( + cum_welfare = datt_data$L, + cum_population = dat_data$p, + estimate = "lorenz", + n_bins = 100 ) ``` -### Retrieve Lorenz curve data -To retrieve Lorenz curve data, you can specify `estimate = "lorenz"` and provide the number of bins (`n_bins`) (There is no default value for `n_bins`). +You can also specify the Lorenz curve methodology by setting the `lorenz` parameter to either "lb" (Beta Lorenz) or "lq" (Quadratic Lorenz). + ```{r} get_gd( - cum_welfare = cum_welfare, - cum_population = cum_population, + cum_welfare = datt_data$L, + cum_population = dat_data$p, estimate = "lorenz", - n_bins = 10 + lorenz = "lb", + n_bins = 100 +) +``` + + +## Retrieve regression parameters + +Finally you can retrieve the regression parameters used for the Lorenz curve estimation by setting `estimate = "params"`. The methods used, both the Beta Lorenz and the Quadratic Lorenz, are described in detail in Datt (1998). + +```{r} +get_gd( + cum_welfare = datt_data$L, + cum_population = dat_data$p, + estimate = "params" ) ``` +The variable `selected_for_dist` shows the Lorenz curve methodology selected by default when calculating the Lorenz curve data points and the deciles. The variable `selected_for_pov` shows the Lorenz curve methodology selected by default when calculating the poverty and inequality statistics. + +## References + +- Datt, Gaurav (1998). "Computational tools for poverty measurement and analysis." FCND Discussion Paper 50. International Food Policy Research Institute (IFPRI). Washington, DC. [Link](https://www.ifpri.org/publication/computational-tools-poverty-measurement-and-analysis) + + + From 77c3b3c4c7265b748277c1c287777219cdb8eceb Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Fri, 8 Nov 2024 05:44:37 -0500 Subject: [PATCH 27/44] cp actually needs a default value --- R/get_cp.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_cp.R b/R/get_cp.R index 600802e..614d6ef 100644 --- a/R/get_cp.R +++ b/R/get_cp.R @@ -20,7 +20,7 @@ get_cp <- function(country = "all", povline = NULL, version = NULL, - ppp_version = NULL, + ppp_version = 2017, # we need to give a default value release_version = NULL, api_version = "v1", format = c("arrow", "rds", "json", "csv"), From c918bbf459a2bb42c2031825f0ec6182eb03eef8 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 11 Nov 2024 15:55:32 -0500 Subject: [PATCH 28/44] use cases added to get-gd draft --- vignettes/articles/get-gd-example.Rmd | 89 ++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 9 deletions(-) diff --git a/vignettes/articles/get-gd-example.Rmd b/vignettes/articles/get-gd-example.Rmd index e1ab5fd..52523b8 100644 --- a/vignettes/articles/get-gd-example.Rmd +++ b/vignettes/articles/get-gd-example.Rmd @@ -7,7 +7,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r, include = FALSE} +```{r, include = FALSE, message=FALSE, warning=FALSE} NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, @@ -16,8 +16,9 @@ knitr::opts_chunk$set( ) ``` -```{r setup} +```{r setup, message=FALSE, warning=FALSE} library(pipr) +library(ggplot2) ``` The `get_gd()` function allows users to interact with the PIP API to retrieve grouped data statistics based on cumulative welfare and population data. Depending on the chosen `estimate` parameter, it can retrieve: @@ -43,7 +44,7 @@ datt_povline <- 89 ``` To retrieve basic grouped statistics, you need to provide cumulative welfare and population values along with the requested mean and poverty line. -```{r} +```{r, warning=FALSE, message=FALSE, eval=FALSE} get_gd( cum_welfare = datt_data$L, cum_population = datt_data$p, @@ -54,7 +55,7 @@ get_gd( ``` As an alternative, instead of the mean, you can provide the population share (`popshare`), which will be assumed equal to the poverty headcount ratio, and used to calculate the rest of the statistics (and the poverty line itself): -```{r} +```{r, warning=FALSE, message=FALSE, eval=FALSE} get_gd( cum_welfare = datt_data$L, cum_population = datt_data$p, @@ -70,7 +71,7 @@ get_gd( To retrieve Lorenz curve data, you can specify `estimate = "lorenz"` and provide the number of bins (`n_bins`) to return (there is no default value for `n_bins`). The Lorenz curve will be estimated with both the Beta Lorenz and Quadratic Lorenz methodologies, then the best one will be selected by default. -```{r} +```{r, warning=FALSE, message=FALSE, eval=FALSE} get_gd( cum_welfare = datt_data$L, cum_population = dat_data$p, @@ -79,9 +80,9 @@ get_gd( ) ``` -You can also specify the Lorenz curve methodology by setting the `lorenz` parameter to either "lb" (Beta Lorenz) or "lq" (Quadratic Lorenz). +You can also specify the Lorenz curve methodology by setting the `lorenz` parameter to either `"lb"` (Beta Lorenz) or `"lq"` (Quadratic Lorenz). -```{r} +```{r, warning=FALSE, message=FALSE, eval=FALSE} get_gd( cum_welfare = datt_data$L, cum_population = dat_data$p, @@ -94,9 +95,9 @@ get_gd( ## Retrieve regression parameters -Finally you can retrieve the regression parameters used for the Lorenz curve estimation by setting `estimate = "params"`. The methods used, both the Beta Lorenz and the Quadratic Lorenz, are described in detail in Datt (1998). +Finally, you can retrieve the regression parameters used for the Lorenz curve estimation by setting `estimate = "params"`. The methods used, both the Beta Lorenz and the Quadratic Lorenz, are described in detail in Datt (1998). -```{r} +```{r, warning=FALSE, message=FALSE, eval=FALSE} get_gd( cum_welfare = datt_data$L, cum_population = dat_data$p, @@ -106,6 +107,76 @@ get_gd( The variable `selected_for_dist` shows the Lorenz curve methodology selected by default when calculating the Lorenz curve data points and the deciles. The variable `selected_for_pov` shows the Lorenz curve methodology selected by default when calculating the poverty and inequality statistics. +## Use cases + +### Poverty and Inequality Statistics +```{r, warning=FALSE, message=FALSE, eval=FALSE} +datt_mean <- 109.9 + +datt_povline <- 89 + +# Define a sequence of poverty lines +poverty_lines <- seq(50, 150, by = 5) + +# Initialize a data frame to store results +poverty_stats <- data.frame() + +# Loop over poverty lines to compute poverty measures +for (pl in poverty_lines) { + stats <- get_gd( + cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "stats", + requested_mean = 109.9, + povline = pl + ) + poverty_stats <- rbind(poverty_stats, data.frame(poverty_line = pl, headcount = stats$headcount, poverty_gap = stats$poverty_gap, poverty_severity = stats$poverty_severity)) +} + +# Plotting +ggplot(poverty_stats, aes(x = poverty_line)) + + geom_line(aes(y = headcount, color = "Headcount Ratio")) + + geom_line(aes(y = poverty_gap, color = "Poverty Gap")) + + geom_line(aes(y = poverty_severity, color = "Poverty Severity")) + + geom_vline(xintercept = datt_povline, linetype = "dashed", color = "black") + + geom_text(aes(x = datt_povline, y = 0.8, label = "Official poverty line (89 Rs)"), hjust = 1.1, vjust = 0) + + geom_vline(xintercept = datt_mean, color = "black") + + geom_text(aes(x = datt_mean, y = 0.8, label = "Mean (109.9 Rs)"), hjust = -.1) + + labs( + title = "Poverty Measures vs. Poverty Line, Rural India 1983", + x = "Poverty Line", + y = "Measure Value", + color = "Poverty Measure" + ) + + theme_minimal() + +``` + +### Lorenz Curve Data +```{r, warning=FALSE, message=FALSE, eval=FALSE} +lorenz_points_lq <- pipgd_lorenz_curve(datt_data$L, datt_data$p, lorenz = "lq") +lorenz_points_lq_10 <- pipgd_lorenz_curve(datt_data$L, datt_data$p, lorenz = "lq", n_bins = 10) + +lorenz_df_lq_100 <- data.frame(cum_welfare = lorenz_points_lq$lorenz_curve$output, + cum_population = lorenz_points_lq$lorenz_curve$points) + +lorenz_df_lq_10 <- data.frame(cum_welfare = lorenz_points_lq_10$lorenz_curve$output, + cum_population = lorenz_points_lq_10$lorenz_curve$points) + +ggplot() + + geom_bar(data = lorenz_df_lq_10, aes(x = cum_population, y = cum_welfare), stat = "identity", fill = "blue", alpha = 0.3) + + geom_bar(data = datt_data, aes(x = p, y = L), stat = "identity", fill = "blue") + + geom_point(data = lorenz_df_lq_100, aes(x = cum_population, y = cum_welfare), color = "blue", size = 0.5) + + geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") + + labs( + title = "Lorenz Curve for Rural India, 1983", + x = "Cumulative Share of Population", + y = "Cumulative Share of Welfare" + ) + + theme_minimal() +``` + + ## References - Datt, Gaurav (1998). "Computational tools for poverty measurement and analysis." FCND Discussion Paper 50. International Food Policy Research Institute (IFPRI). Washington, DC. [Link](https://www.ifpri.org/publication/computational-tools-poverty-measurement-and-analysis) From e6d6a3e63cb52c93097d8387d33f85a8246eae22 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 11 Nov 2024 16:05:16 -0500 Subject: [PATCH 29/44] typo corrected in get_gd, get_cp examples changes and default measures changed too. --- R/get_cp.R | 11 +++++++---- R/get_gd.R | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/get_cp.R b/R/get_cp.R index 614d6ef..2b8084d 100644 --- a/R/get_cp.R +++ b/R/get_cp.R @@ -11,16 +11,19 @@ #' #' @examples #' \dontrun{ -#' # One country-year +#' # One country, all years with default ppp_version = 2017 #' res <- get_cp(country = "AGO") #' -#' # All countries and years +#' # All countries, povline = 1.9 +#' res <- get_cp(povline = 1.9) +#' +#' # All countries and years with default values #' res <- get_cp() #' } get_cp <- function(country = "all", - povline = NULL, + povline = 2.15, # GC: default value like Stata version = NULL, - ppp_version = 2017, # we need to give a default value + ppp_version = 2017, # GC: default value like Stata release_version = NULL, api_version = "v1", format = c("arrow", "rds", "json", "csv"), diff --git a/R/get_gd.R b/R/get_gd.R index d29a71f..f772047 100644 --- a/R/get_gd.R +++ b/R/get_gd.R @@ -49,7 +49,7 @@ #' params <- get_gd(cum_welfare = datt_data$L, #' cum_population = datt_data$p, #' estimate = "params") -#' +#'} get_gd <- function(cum_welfare = NULL, cum_population = NULL, From 5acca6d2803f03672e12ffb8902e2a004925dace Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Fri, 15 Nov 2024 11:47:06 -0500 Subject: [PATCH 30/44] new datasets to test gd data, gd data vignette --- data-raw/datt.R | 44 ++++++++++++ data/datt_rural.rda | Bin 0 -> 794 bytes data/datt_urban.rda | Bin 0 -> 827 bytes vignettes/articles/get-gd-example.Rmd | 94 +++++++++++++++----------- 4 files changed, 99 insertions(+), 39 deletions(-) create mode 100644 data-raw/datt.R create mode 100644 data/datt_rural.rda create mode 100644 data/datt_urban.rda diff --git a/data-raw/datt.R b/data-raw/datt.R new file mode 100644 index 0000000..053a752 --- /dev/null +++ b/data-raw/datt.R @@ -0,0 +1,44 @@ +# Datt Data procedure ---- +# Data is from Sarveskhana, Vol IX n 4 (April 1986), same dataset used by Datt (1998). +# Function to calculate cumulative shares (p and L) +calculate_cumulative_shares <- function(data, area) { + # Step 1: Calculate cumulative population share (p) + data$p <- cumsum(data$percentage_of_persons) / sum(data$percentage_of_persons) + + # Step 2: Calculate class consumption + data$class_consumption <- data$mean_monthly_pc_exp * (data$percentage_of_persons / 100) + + # Step 3: Calculate cumulative welfare share (L) + total_consumption <- sum(data$class_consumption) + data$L <- cumsum(data$class_consumption) / total_consumption + + # Add area identifier + data$area <- area + + # Return the data frame with p, L, and area columns + return(data[, setdiff(names(data), "class_consumption")]) +} + +# Rural data setup +rural_data <- data.frame( + monthly_pc_exp = c("0 – 30", "30 – 40", "40 – 50", "50 – 60", "60 – 75", "75 – 85", "85 – 100", + "100 – 125", "125 – 150", "150 – 200", "200 – 250", "250 – 300", "300 & above"), + mean_monthly_pc_exp = c(24.4, 35.8, 45.36, 55.10, 64.92, 77.08, 91.75, 110.64, 134.9, 167.76, 215.48, 261.66, 384.97), + percentage_of_persons = c(0.92, 2.47, 5.11, 7.90, 9.69, 15.24, 13.64, 16.99, 10.00, 9.78, 3.96, 1.81, 2.49) +) + +# Urban data setup +urban_data <- data.frame( + monthly_pc_exp = c("0 – 30", "30 – 40", "40 – 50", "50 – 60", "60 – 75", "75 – 85", "85 – 100", + "100 – 125", "125 – 150", "150 – 200", "200 – 250", "250 – 300", "300 & above"), + mean_monthly_pc_exp = c(21.87, 35.8, 45.63, 55.46, 65.13, 77.2, 92.26, 111.41, 136.27, 170.13, 219.76, 267.33, 424.6), + percentage_of_persons = c(0.21, 0.40, 1.01, 1.40, 3.92, 9.21, 10.64, 13.13, 11.17, 8.75, 9.16, 5.92, 9.32) +) + +# Apply function to rural and urban data +datt_rural <- calculate_cumulative_shares(rural_data, "rural") +datt_urban <- calculate_cumulative_shares(urban_data, "urban") + + +# Usethis +usethis::use_data(datt_rural, datt_urban, overwrite = TRUE) diff --git a/data/datt_rural.rda b/data/datt_rural.rda new file mode 100644 index 0000000000000000000000000000000000000000..918e14bbfaf9a3ca0175858b4b9b8fb181e5222a GIT binary patch literal 794 zcmV+#1LgceT4*^jL0KkKS?E$X2LJ(6|NsC0-_iYPf7Acm``!Q7|M>QG@3g6TS4UjO zRlIlq-&eo_95uDIfYcP#3>r;0s7I-$jT&GPlR>AbFoV>@zyJ|~F%7A;9;Qt(2GVSy z>N7?m8%gS6pn8m_rUDTPo|9!7Jf_kB00000&;ZaLpa1{>01Y$-fB*mh0000802%{E znluKOg9rvjLqVaY5MY>^4FEI%0|d}8OoIr}WHi$WlMn+yk|7a1Qe8-y86YIG5mJFNLOa6&q-Dm0Dqv8x6fcVzB(Tg^ zG6Uahbb@G^m}D@D4dqpC22GN=Fl{9|g1(!>3{}7)N~%%X)^e4RftsxL?A`7m}$(pU=iQ>)7h_vFgTZSl7!~p&{1WBIB`12e~X*Z<;_L= zbx_iDMIc)5Ff>{F3ejH>f%3d&(IE*85LmXnj_FWHpY61g{2l)6Z&4(3wp11jABx_z z$)vXuq_7``$0kt=Ot{7{MRss+9{8vZMj6BXa?g|w5D=LTKCp%!AQ2LczZEHaUbWpj zY*G_OC1}3<9(5^rK6Rw(rbBw@a=~DMO)w|u>^Ji>aru8)w?WA66jzi$foRAAe=x88 zpP1W5WNf9PyC@ovdjpC$ zdM0X453)(B(K7(Rq%yb|01)>MjXJo>i-P&wk!R2_DKHDqD~FE`Heo+|QwjK81i(DJ Y#`v{Plo(wh7z@HKGGL~TruQ1qUm>NFV`Xlh^yngO6j zOrDud7}V3$+McJTpnj7R4I%?W(lpaXhMF1yk)UalL6FGPBSt`Jp^yeaqeD!9z)b;! z05Sm6K+%ZMXfy{;@}%_|G-wSP00TonGynk53_xR0X^I)m^9F%Dc63!q1ya0Ge zAV?+ff`Tm*000LBSIP+PjF=U!5~eAbfJ!k5VkfK_ad`+2Qxs6Hx77n@3?MsN*nfKr zjhpaat$^(j%i5ab>kP7N$1OD4?;!0(jJ-8*Zp9Ip;4(bhz!b~$EC7-jcmcp6aRdR} zr}x#puq*?Z(K371pHl;f6@twPwJ#W$p@;Km%iWLDR{@GIZ`RRDSB9`^q{O7uYDI!AeW$n6ZxJl-1Hj-H3J* z#pYHbkhO|gH8)vLGM}%!TM0oquyqSaYz%mm4Dx8#G%Vzj+46{y(hD|?*sjso0{jA| ziEvD@M`hBnb^ULxT1J!+^gsxqN?#S?KujPR&{Or0=dayeT;Tjgp9d`X#Ef(ov_F7u zByHN}zKa`#sAXG+oI&dc-=?^z&LS~~5*F#`L)myt&WlN{LNMK$! F@PK(Vb-(}s literal 0 HcmV?d00001 diff --git a/vignettes/articles/get-gd-example.Rmd b/vignettes/articles/get-gd-example.Rmd index 52523b8..d5469ef 100644 --- a/vignettes/articles/get-gd-example.Rmd +++ b/vignettes/articles/get-gd-example.Rmd @@ -33,21 +33,21 @@ Here are a few examples to get you started: By default, `get_gd()` returns grouped statistics (`estimate = "stats"`) based on cumulative welfare (`cum_welfare`) and population values (`cum_population`), both expressed as percentages. The default mean (`requested_mean`) and poverty line (`povline`) are set to 1, so the user should specify the known mean of the distribution, and the desired poverty line. -The data used in this example is from Datt (1998). The dataset lists the cumulative welfare and population values for rural India in 1983 expressed in shares (percentages). The mean of the distribution is 109.9 Rs (daily), and the poverty line at the time was 89 Rs. Note that the cumulative welfare and population values should be monotonically increasing and sum to 1 to be valid. Additionally, the cumulative population values should always be greater or equal to the corresponding welfare values. +The data used in this example is from Datt (1998). Among other consumption survey variables, The dataset lists the cumulative welfare(`L`, for Lorenz) and population values(`p`) for rural India in 1983, expressed in shares (percentages). The mean of the distribution is 109.9 Rs (daily), and the poverty line at the time was 89 Rs. Note that the cumulative welfare and population values should be monotonically increasing and sum to 1 to be valid. Additionally, the cumulative population values should always be greater or equal to the corresponding welfare values. ```{r} -datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, 1), - L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, 0.23498, 0.34887, 0.51994, 0.64270, 0.79201, 0.86966, 0.91277, 1)) datt_mean <- 109.9 datt_povline <- 89 + +str(datt_rural) ``` To retrieve basic grouped statistics, you need to provide cumulative welfare and population values along with the requested mean and poverty line. -```{r, warning=FALSE, message=FALSE, eval=FALSE} +```{r, warning=FALSE, message=FALSE} get_gd( - cum_welfare = datt_data$L, - cum_population = datt_data$p, + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, estimate = "stats", requested_mean = datt_mean, povline = datt_povline @@ -55,10 +55,10 @@ get_gd( ``` As an alternative, instead of the mean, you can provide the population share (`popshare`), which will be assumed equal to the poverty headcount ratio, and used to calculate the rest of the statistics (and the poverty line itself): -```{r, warning=FALSE, message=FALSE, eval=FALSE} +```{r, warning=FALSE, message=FALSE} get_gd( - cum_welfare = datt_data$L, - cum_population = datt_data$p, + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, estimate = "stats", requested_mean = datt_mean, popshare = 0.3 @@ -71,24 +71,28 @@ get_gd( To retrieve Lorenz curve data, you can specify `estimate = "lorenz"` and provide the number of bins (`n_bins`) to return (there is no default value for `n_bins`). The Lorenz curve will be estimated with both the Beta Lorenz and Quadratic Lorenz methodologies, then the best one will be selected by default. -```{r, warning=FALSE, message=FALSE, eval=FALSE} -get_gd( - cum_welfare = datt_data$L, - cum_population = dat_data$p, +```{r, warning=FALSE, message=FALSE} +lorenz_100 <- get_gd( + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, estimate = "lorenz", - n_bins = 100 + n_bins = 100, + server = "dev" ) + +str(lorenz_100) ``` You can also specify the Lorenz curve methodology by setting the `lorenz` parameter to either `"lb"` (Beta Lorenz) or `"lq"` (Quadratic Lorenz). -```{r, warning=FALSE, message=FALSE, eval=FALSE} +```{r, warning=FALSE, message=FALSE} get_gd( - cum_welfare = datt_data$L, - cum_population = dat_data$p, + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, estimate = "lorenz", lorenz = "lb", - n_bins = 100 + n_bins = 100, + server = "dev" ) ``` @@ -97,10 +101,10 @@ get_gd( Finally, you can retrieve the regression parameters used for the Lorenz curve estimation by setting `estimate = "params"`. The methods used, both the Beta Lorenz and the Quadratic Lorenz, are described in detail in Datt (1998). -```{r, warning=FALSE, message=FALSE, eval=FALSE} +```{r, warning=FALSE, message=FALSE} get_gd( - cum_welfare = datt_data$L, - cum_population = dat_data$p, + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, estimate = "params" ) ``` @@ -109,8 +113,10 @@ The variable `selected_for_dist` shows the Lorenz curve methodology selected by ## Use cases -### Poverty and Inequality Statistics -```{r, warning=FALSE, message=FALSE, eval=FALSE} +We now show three examples of how the `get_gd()` function could be integrated in a workflow to analyze and visualize group data. + +### Poverty Line vs Poverty Measures +```{r, warning=FALSE, message=FALSE} datt_mean <- 109.9 datt_povline <- 89 @@ -124,8 +130,8 @@ poverty_stats <- data.frame() # Loop over poverty lines to compute poverty measures for (pl in poverty_lines) { stats <- get_gd( - cum_welfare = datt_data$L, - cum_population = datt_data$p, + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, estimate = "stats", requested_mean = 109.9, povline = pl @@ -149,25 +155,18 @@ ggplot(poverty_stats, aes(x = poverty_line)) + color = "Poverty Measure" ) + theme_minimal() - ``` ### Lorenz Curve Data -```{r, warning=FALSE, message=FALSE, eval=FALSE} -lorenz_points_lq <- pipgd_lorenz_curve(datt_data$L, datt_data$p, lorenz = "lq") -lorenz_points_lq_10 <- pipgd_lorenz_curve(datt_data$L, datt_data$p, lorenz = "lq", n_bins = 10) - -lorenz_df_lq_100 <- data.frame(cum_welfare = lorenz_points_lq$lorenz_curve$output, - cum_population = lorenz_points_lq$lorenz_curve$points) - -lorenz_df_lq_10 <- data.frame(cum_welfare = lorenz_points_lq_10$lorenz_curve$output, - cum_population = lorenz_points_lq_10$lorenz_curve$points) +```{r, warning=FALSE, message=FALSE} +lorenz_points_lq <- get_gd(datt_data$L, datt_data$p, estimate = "lorenz", lorenz = "lq", server = "dev") +lorenz_points_lq_10 <- get_gd(datt_data$L, datt_data$p, estimate = "lorenz", lorenz = "lq", n_bins = 10, server = "dev") ggplot() + - geom_bar(data = lorenz_df_lq_10, aes(x = cum_population, y = cum_welfare), stat = "identity", fill = "blue", alpha = 0.3) + - geom_bar(data = datt_data, aes(x = p, y = L), stat = "identity", fill = "blue") + - geom_point(data = lorenz_df_lq_100, aes(x = cum_population, y = cum_welfare), color = "blue", size = 0.5) + - geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") + + geom_bar(data = lorenz_points_lq_10, aes(x = weight, y = welfare), stat = "identity", fill = "darkorange", alpha = 0.3) + + geom_point(data = datt_data, aes(x = p, y = L), color = "darkorange", size = 2) + + geom_point(data = lorenz_points_lq, aes(x = weight, y = welfare), color = "darkorange", size = 0.5) + + geom_abline(intercept = 0, slope = 1, color = "black") + labs( title = "Lorenz Curve for Rural India, 1983", x = "Cumulative Share of Population", @@ -175,11 +174,28 @@ ggplot() + ) + theme_minimal() ``` +### Rural vs Urban +```{r, warning=FALSE, message=FALSE} +lorenz_rural <- get_gd(datt_rural$L, datt_rural$p, estimate = "lorenz", lorenz = "lq", server = "dev", n_bins = 1000) +lorenz_urban <- get_gd(datt_urban$L, datt_urban$p, estimate = "lorenz", lorenz = "lq", server = "dev", n_bins = 1000) +ggplot() + + geom_point(data = lorenz_rural, aes(x = weight, y = welfare), color = "darkorange", size = 0.5) + + geom_point(data = lorenz_urban, aes(x = weight, y = welfare), color = "steelblue", size = 0.5) + + geom_abline(intercept = 0, slope = 1, color = "black") + + labs( + title = "Lorenz Curve for India, 1983", + x = "Cumulative Share of Population", + y = "Cumulative Share of Welfare" + ) + + theme_minimal() +``` ## References - Datt, Gaurav (1998). "Computational tools for poverty measurement and analysis." FCND Discussion Paper 50. International Food Policy Research Institute (IFPRI). Washington, DC. [Link](https://www.ifpri.org/publication/computational-tools-poverty-measurement-and-analysis) +- Sarveskhana, Vol IX n 4 (April 1986). [Link](https://mospi.gov.in/sites/default/files/publication_reports/Sarvekshana_issue%20no.%2027_%20Vol%20%209%20No%204.PDF) + From 0cd973fe73c376d76b444606c4c25760685943f2 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Fri, 15 Nov 2024 12:28:48 -0500 Subject: [PATCH 31/44] get_gd some changes + full coverage with tests for get_gd() --- R/get_gd.R | 13 +++-- man/get_gd.Rd | 50 +++++++++++++++-- tests/testthat/test-get_gd.R | 105 +++++++++++++++++++++++++++++++++++ 3 files changed, 160 insertions(+), 8 deletions(-) create mode 100644 tests/testthat/test-get_gd.R diff --git a/R/get_gd.R b/R/get_gd.R index f772047..1d7571c 100644 --- a/R/get_gd.R +++ b/R/get_gd.R @@ -8,13 +8,18 @@ #' @param estimate character: One of "stats", "lorenz", "params". #' @param requested_mean numeric: Requested mean. #' @param povline numeric: Poverty line. Required for estimate = "stats". +#' @param lorenz character: Lorenz curve methodology. Either "lb" or "lq". #' @param n_bins numeric: Number of bins. Required for estimate = "lorenz". #' #' @examples #' \dontrun{ #' -#' datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, 1), -#' L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, 0.23498, 0.34887, 051994, 0.64270, 0.79201, 0.86966, 0.91277, 1) +#' datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, +#' 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, +#' 1), +#' L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, +#' 0.23498, 0.34887, 051994, 0.64270, 0.79201, +#' 0.86966, 0.91277, 1) #' #' # estimate = 'stats': retrieve poverty statistics. #' stats <- get_gd(cum_welfare = datt_data$L, cum_population = datt_data$p, @@ -54,8 +59,8 @@ get_gd <- function(cum_welfare = NULL, cum_population = NULL, estimate = c("stats", "lorenz", "params"), - requested_mean = 1, # stats specific. - povline = 1, # stats specific. + requested_mean = NULL, # stats specific. + povline = NULL, # stats specific. popshare = NULL, # stats specific. lorenz = NULL, # lorenz specific. n_bins = NULL, # lorenz specific. diff --git a/man/get_gd.Rd b/man/get_gd.Rd index dd7e5ad..bc8e226 100644 --- a/man/get_gd.Rd +++ b/man/get_gd.Rd @@ -11,6 +11,7 @@ get_gd( requested_mean = 1, povline = 1, popshare = NULL, + lorenz = NULL, n_bins = NULL, api_version = "v1", format = c("rds", "json", "csv"), @@ -19,20 +20,20 @@ get_gd( ) } \arguments{ -\item{cum_welfare}{numeric: Cumulative welfare values.} +\item{cum_welfare}{numeric: Cumulative welfare values, expressed in shares. Any length. They should be monotonically increasing, and sum to 1.} -\item{cum_population}{numeric: Cumulative population values.} +\item{cum_population}{numeric: Cumulative population values, expressed in shares. Any length. They should be monotonically increasing, and sum to 1.} \item{estimate}{character: One of "stats", "lorenz", "params".} \item{requested_mean}{numeric: Requested mean.} -\item{povline}{numeric: Poverty line. Required for endpoint = "grouped-stats".} +\item{povline}{numeric: Poverty line. Required for estimate = "stats".} \item{popshare}{numeric: Proportion of the population living below the poverty line} -\item{n_bins}{numeric: Number of bins. Required for endpoint = "lorenz-curve".} +\item{n_bins}{numeric: Number of bins. Required for estimate = "lorenz".} \item{api_version}{character: API version} @@ -46,3 +47,44 @@ poverty line} \description{ Get grouped stats from the PIP API. } +\examples{ +\dontrun{ + +datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, 1), + L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, 0.23498, 0.34887, 051994, 0.64270, 0.79201, 0.86966, 0.91277, 1) + +# estimate = 'stats': retrieve poverty statistics. +stats <- get_gd(cum_welfare = datt_data$L, cum_population = datt_data$p, + estimate = "stats", + requested_mean = 19, # default is 1. + povline = 2.15) # default is 1. + +# estimate = 'lorenz': retrieve Lorenz curve data points for a specified number of bins. + +## Best lorenz curve methodolody selected by default: +lorenz <- get_gd(cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "lorenz", + n_bins = 100) # must be specified, default is NULL. + +## Specify lorenz curve methodology: +### Beta Lorenz ("lb") +lorenz_lb <- get_gd(cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "lorenz", + lorenz = "lb", + n_bins = 100) + +### Quadratic Lorenz ("lq") +lorenz_lq <- get_gd(cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "lorenz", + lorenz = "lq", + n_bins = 100) + +# estimate = 'params': retrieve regression parameters used for the lorenz curve estimation. +params <- get_gd(cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "params") +} +} diff --git a/tests/testthat/test-get_gd.R b/tests/testthat/test-get_gd.R new file mode 100644 index 0000000..d39e73f --- /dev/null +++ b/tests/testthat/test-get_gd.R @@ -0,0 +1,105 @@ +# Section 1: General Argument Checks ---- +test_that("General Argument Checks", { + + # Check if cum_welfare and cum_population have different lengths + cum_welfare <- c(0.1, 0.2, 0.3) + cum_population <- c(0.1, 0.2) + + expect_error( + get_gd(cum_welfare = cum_welfare, cum_population = cum_population, estimate = "stats"), + "must have the same length" + ) +}) + +## Section 2: Testing 'stats' Endpoint ----- +test_that("'stats' Endpoint Tests", { + + # Error when requested_mean or povline is missing + cum_welfare <- datt_data$L + cum_population <- datt_data$p + + # Missing requested_mean + expect_error( + get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "stats", povline = 89), + "must be provided" + ) + + # Missing povline + expect_error( + get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "stats", requested_mean = 109.9), + "must be provided" + ) + + # Correct retrieval of statistics + stats <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "stats", requested_mean = 109.9, povline = 89) + + expect_s3_class(stats, "data.frame") # The returned object should be a data frame + #expect_true(all(c("poverty_line", "mean", "headcount") %in% names(stats))) + + # popshare + stats <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "stats", requested_mean = 109.9, popshare = 0.5, + povline = 89) + + expect_true(stats$headcount == 0.5) +}) + +### Section 3: Testing 'lorenz' Endpoint ---- +test_that("'lorenz' Endpoint Tests", { + + cum_welfare <- datt_rural$L + cum_population <- datt_rural$p + + # Correct retrieval of Lorenz curve data points + lorenz <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "lorenz", n_bins = 10, server = "dev") + + expect_s3_class(lorenz, "data.frame") + expect_true(all(c("weight", "welfare") %in% names(lorenz))) + expect_equal(nrow(lorenz), 10) + + # Handling of lorenz methodology + lorenz_lb <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "lorenz", lorenz = "lb", n_bins = 100, server = "dev") + + expect_s3_class(lorenz_lb, "data.frame") + expect_true(all(c("weight", "welfare") %in% names(lorenz_lb))) + + lorenz_lq <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "lorenz", lorenz = "lq", n_bins = 100, server = "dev") + + expect_s3_class(lorenz_lq, "data.frame") + expect_true(all(c("weight", "welfare") %in% names(lorenz_lq))) +}) + +### Section 4: Testing 'params' Endpoint ----- +test_that("'params' Endpoint Tests", { + + cum_welfare <- datt_data$L + cum_population <- datt_data$p + + # Proper retrieval of regression parameters + params <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, estimate = "params") + + expect_s3_class(params, "data.frame") + expect_true(all(c("A", "B") %in% names(params))) # Check some example parameter names +}) + +### Section 5: Additional ----- +test_that("Edge Cases and Error Handling", { + + # Both cum_welfare and cum_population missing + expect_error( + get_gd(estimate = "stats", requested_mean = 19, povline = 2.15) + ) + + # Invalid lorenz method + expect_error( + get_gd(cum_welfare = datt_data$L, cum_population = datt_data$p, + estimate = "lorenz", lorenz = "invalid_method", n_bins = 10, server = "dev"), + "You supplied an invalid value for lorenz" + ) +}) From 884c4af3ee16199481c4bb48fed898baeab35aa1 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 18 Nov 2024 10:47:44 -0500 Subject: [PATCH 32/44] get_cp() and get_cp_ki() with vignette done --- NAMESPACE | 1 + R/get_cp_ki.R | 127 ++++++++++++++++++++++++++ man/get_cp.Rd | 11 ++- man/get_cp_ki.Rd | 61 +++++++++++++ vignettes/articles/get-cp-example.Rmd | 82 +++++++++++++++++ 5 files changed, 278 insertions(+), 4 deletions(-) create mode 100644 R/get_cp_ki.R create mode 100644 man/get_cp_ki.Rd create mode 100644 vignettes/articles/get-cp-example.Rmd diff --git a/NAMESPACE b/NAMESPACE index ddc6ba4..4a76a5b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(display_aux) export(get_aux) export(get_cache_info) export(get_cp) +export(get_cp_ki) export(get_pip_info) export(get_stats) export(get_versions) diff --git a/R/get_cp_ki.R b/R/get_cp_ki.R new file mode 100644 index 0000000..908e782 --- /dev/null +++ b/R/get_cp_ki.R @@ -0,0 +1,127 @@ +#' Get Country Profiles Key Indicators +#' +#' @inheritParams get_stats +#' +#' @return If `simplify = FALSE`, it returns a list of class "pip_api". If +#' `simplify = TRUE`, it returns a tibble with the requested data. This is the +#' default. Only for `get_aux()`, If `assign_tb = TRUE` or character, it +#' returns TRUE when data was assign properly to .pip env. FALSE, if it was +#' not assigned. +#' @export +#' +#' @examples +#' \dontrun{ +#' # One country, all years with default ppp_version = 2017 +#' res <- get_cp(country = "IDN") +#' +#' # All countries, povline = 1.9 +#' res <- get_cp(country = "IDN", povline = 1.9) +#' +#' } +get_cp_ki <- function(country = NULL, + povline = 2.15, # GC: default value like Stata + version = NULL, + ppp_version = 2017, # GC: default value like Stata + release_version = NULL, + api_version = "v1", + format = c("arrow", "rds", "json", "csv"), + simplify = TRUE, + server = NULL) { + + + # 0. Match args ---- + api_version <- match.arg(api_version) + format <- match.arg(format) + + # 1. povline set-up ---- + # (GC: stata equivalent but no 2005 and default to 2.15) + if (is.null(povline)) { + if (ppp_version == "2011") { + povline <- 1.9 + } else { + povline <- 2.15 + } + } + + # 2. country set-up ---- + if (is.null(country)) { + cli::cli_abort("Please provide a country code.") + } + + if (length(country) > 1) { + cli::cli_abort("Please provide only one country code.") + } + + + # 2. Build query string ---- + req <- build_request( + country = country, + povline = povline, + version = version, + ppp_version = ppp_version, + release_version = release_version, + format = format, + server = server, + api_version = api_version, + endpoint = "cp-key-indicators" + ) + + + # 3. Perform request ---- + res <- req |> + httr2::req_perform() + + # 4. Parse result and return (if simplify == FALSE) + if (!simplify) { + out <- parse_response(res, simplify) + } else { + out <- parse_response(res, simplify) + + # 5. Unnest + out <- unnest_ki(out) + + } + + return(out) + +} + + +# Unnest ki (no collapse/tidyverse) ---- +#' Unnest the key indicators +#' @describeIn unnest_ki() takes the simplified output from cp-key-indicators endpoint and unnests it. +#' +#' @param out parsed and simplified output from cp-key-indicators endpoint +#' +#' @return data frame, unnested. +#' +unnest_ki <- function(out){ + + # Step 1: Extract all data frames into individual variables + headcount <- out$headcount[[1]] + headcount_national <- out$headcount_national[[1]] + mpm_headcount <- out$mpm_headcount[[1]] + pop <- out$pop[[1]] + + gni <- out$gni[[1]] + gni <- gni[!duplicated(gni[c("country_code", "reporting_year")]), ] + + + gdp_growth <- out$gdp_growth[[1]] + gdp_growth <- gdp_growth[!duplicated(gdp_growth[c("country_code", "reporting_year")]), ] + + shared_prosperity <- out$shared_prosperity[[1]] + + # Step 2: Merge data frames on common columns + df_merged <- merge(headcount, headcount_national, by = c("country_code", "reporting_year"), all = TRUE) + df_merged <- merge(df_merged, mpm_headcount, by = c("country_code", "reporting_year"), all = TRUE) + df_merged <- merge(df_merged, pop, by = c("country_code", "reporting_year"), all = TRUE) + df_merged <- merge(df_merged, gni, by = c("country_code", "reporting_year"), all = TRUE) + df_merged <- merge(df_merged, gdp_growth, by = c("country_code", "reporting_year"), all = TRUE, suffixes = c("_gni", "_gdp")) + final_df <- merge(df_merged, shared_prosperity, by = "country_code", all = TRUE) + + # Step 3: return + return(final_df) + +} + diff --git a/man/get_cp.Rd b/man/get_cp.Rd index 599b315..aff98aa 100644 --- a/man/get_cp.Rd +++ b/man/get_cp.Rd @@ -6,9 +6,9 @@ \usage{ get_cp( country = "all", - povline = NULL, + povline = 2.15, version = NULL, - ppp_version = NULL, + ppp_version = 2017, release_version = NULL, api_version = "v1", format = c("arrow", "rds", "json", "csv"), @@ -49,10 +49,13 @@ Get Country Profiles } \examples{ \dontrun{ -# One country-year +# One country, all years with default ppp_version = 2017 res <- get_cp(country = "AGO") -# All countries and years +# All countries, povline = 1.9 +res <- get_cp(povline = 1.9) + +# All countries and years with default values res <- get_cp() } } diff --git a/man/get_cp_ki.Rd b/man/get_cp_ki.Rd new file mode 100644 index 0000000..204f9b7 --- /dev/null +++ b/man/get_cp_ki.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cp_ki.R +\name{get_cp_ki} +\alias{get_cp_ki} +\title{Get Country Profiles Key Indicators} +\usage{ +get_cp_ki( + country = NULL, + povline = 2.15, + version = NULL, + ppp_version = 2017, + release_version = NULL, + api_version = "v1", + format = c("arrow", "rds", "json", "csv"), + simplify = TRUE, + server = NULL +) +} +\arguments{ +\item{country}{character: A vector with one or more \href{https://wits.worldbank.org/wits/wits/witshelp/content/codes/country_codes.htm}{country ISO 3 codes} or +'all'} + +\item{povline}{numeric: Poverty line} + +\item{version}{character: Data version. See \code{get_versions()}} + +\item{ppp_version}{ppp year to be used} + +\item{release_version}{date when the data was published in YYYYMMDD format} + +\item{api_version}{character: API version} + +\item{format}{character: Response format either of c("rds", "json", "csv")} + +\item{simplify}{logical: If TRUE (the default) the response is returned as a +\code{tibble}} + +\item{server}{character: Server. For WB internal use only} +} +\value{ +If \code{simplify = FALSE}, it returns a list of class "pip_api". If +\code{simplify = TRUE}, it returns a tibble with the requested data. This is the +default. Only for \code{get_aux()}, If \code{assign_tb = TRUE} or character, it +returns TRUE when data was assign properly to .pip env. FALSE, if it was +not assigned. +} +\description{ +Get Country Profiles Key Indicators +} +\examples{ +\dontrun{ +# One country, all years with default ppp_version = 2017 +res <- get_cp(country = "AGO") + +# All countries, povline = 1.9 +res <- get_cp(povline = 1.9) + +# All countries and years with default values +res <- get_cp() +} +} diff --git a/vignettes/articles/get-cp-example.Rmd b/vignettes/articles/get-cp-example.Rmd new file mode 100644 index 0000000..d8e6efa --- /dev/null +++ b/vignettes/articles/get-cp-example.Rmd @@ -0,0 +1,82 @@ +--- +title: "get-cp-example" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{pipr} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE, message=FALSE, warning=FALSE} +NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + purl = NOT_CRAN +) +``` + +## Overview + +The `get_cp()` and `get_cp_ki()` functions provide **country profile data**, also available in [PIP's Country Profile page](https://pip.worldbank.org/country-profiles/IDN). This dataset comes from the [Global Monitoring Indicators (GMI)](https://datacatalog.worldbank.org/int/search/dataset/0065517/global-monitoring-indicators--gmi-), a set of harmonized indicators produced from the Global Monitoring Database (GMD) [^1] + +[^1]: The Global Monitoring Database (GMD) is the World Bank’s repository of multi-topic income and expenditure household surveys used to monitor global poverty and shared prosperity. The data comes from household surveys collected by the national statistical office in each country. It is then compiled, processed, and harmonized, so that levels and trends in poverty and other key socio-demographic attributes can be compared across and within countries over time. + +The country profile variables shown by the country profile page (and returned by default by `get_cp_ki` and `get_cp()`) are for the latest available year or comparable spell of years, including a **default poverty line** (\$2.15), and a **default PPP version** (2017). + +## Country Profile Key Indicators + +### Overview + +`get_cp_ki()` returns the key indicators listed at the beginning of the country profile page. These indicators are: + +- `headcount`: Poverty rate at the default international poverty line (% population). +- `headcount_national`: Poverty rate at the national poverty line (% population). +- `mpm_headcount`: Multidimensional poverty headcount ratio (% population). +- `share_below_40`: Annualized growth in per capita mean consumption or income (%) for the bottom 40%. +- `share_total`: Annualized growth in per capita mean consumption or income (%) (total). +- `pop`: Population, total (millions). +- `gni_pc`: GNI per capita, Atlas method (current US\$). +- `gdp_growth`GDP growth (annual %). + +### Usage + +`get_cp_ki` works for single countries only, and returns a data frame with the requested data. + +```{r} +get_cp_ki(country = "IDN") +# Note: get_cp_ki(country = c("IDN", "ITA")) # this won't work! +``` + +As a default, the function returns the latest available year or comparable spell of years, with poverty metrics calculated at the latest international poverty line of \$2.15 and the default PPP version of 2017. The user can change these by passing the respective arguments to the function. + +```{r} +get_cp_ki(country = "IDN", povline = 1.9) + +``` + +## Country Profile Poverty Indicators + +### Overview + +`get_cp()` returns a data frame with the data used to produce the country profile page poverty and inequality metrics visualizations. There are 47 variables included within the following categories: + +- **poverty headcounts**: poverty headcounts at different poverty lines, calculated for all years available. +- **inequality metrics**: Gini and Thiel Indexes, calculated for all years available. +- **shares of given demographic in bottom 40%/top 60%**: calculated for the latest available year. +- **multidimensional poverty indexes**: calculated for all years available. + +### Usage + +`get_cp()` default returns all data available (`country = all`) for the default poverty line (\$2.15) and PPP version (2017). If the user specifies `ppp_version = 2011`, the default poverty line will be \$1.9. + +```{r eval=FALSE} +get_cp() # all countries, default poverty line (2.15) and PPP version (2017) +get_cp(ppp_version = 2011) # all countries, poverty line 1.9. +``` + +The user can also specify a single country or a list of countries. The returned object will be a data frame with the requested data. + +```{r} +get_cp(country = "IDN") # single country +``` From b288fff4f60c4ff3e5128fb83e1774e22553fa89 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 18 Nov 2024 11:11:16 -0500 Subject: [PATCH 33/44] get_cp() tests done, coverage 100% --- R/get_cp.R | 2 -- tests/testthat/test-get_cp.R | 58 ++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 tests/testthat/test-get_cp.R diff --git a/R/get_cp.R b/R/get_cp.R index 2b8084d..356eaab 100644 --- a/R/get_cp.R +++ b/R/get_cp.R @@ -40,8 +40,6 @@ get_cp <- function(country = "all", if (is.null(povline)) { if (ppp_version == "2011") { povline <- 1.9 - } else { - povline <- 2.15 } } diff --git a/tests/testthat/test-get_cp.R b/tests/testthat/test-get_cp.R new file mode 100644 index 0000000..4b71205 --- /dev/null +++ b/tests/testthat/test-get_cp.R @@ -0,0 +1,58 @@ +library(testthat) +library(pipr) + +# 1. General Argument Matching Tests ---- +test_that("Argument matching works correctly for get_cp()", { + skip_if_offline() + skip_on_cran() + # Default arguments, one country, simplify = TRUE + res <- get_cp(country = "AGO") + expect_type(res, "list") # Default simplify = TRUE returns a data.frame + + # Default arguments, one country, simplify = FALSE + res <- get_cp(country = "AGO", simplify = FALSE) + expect_s3_class(res, "pip_api") + + # Argument matching for 'format' + expect_error(get_cp(format = "txt"), "'arg' should be one of") + + # Argument matching for 'api_version' + expect_error(get_cp(api_version = "v2"), "'arg' should be") + +}) + +# 2. povline Set-up Tests ---- +test_that("povline and ppp_version arguments work correctly", { + skip_if_offline() + skip_on_cran() + # Default povline + res <- get_cp(country = "AGO") + expect_true(any(res$poverty_line == 2.15)) + + res <- get_cp(country = "AGO", ppp_version = 2017) + expect_true(any(res$poverty_line == 2.15)) + + # povline with ppp_version 2011 + res <- get_cp(country = "AGO", ppp_version = 2011, povline = NULL) + expect_true(any(res$poverty_line == 1.9)) + + # povline when povline is provided + res <- get_cp(country = "AGO", povline = 3.2) + expect_true(any(res$poverty_line == 3.2)) +}) + + +# 3. Other Tests ---- +test_that("Requests execute successfully for get_cp()", { + skip_if_offline() + skip_on_cran() + + # Check that the response for invalid country throws an error + expect_error(get_cp(country = "INVALID"), "404") + + # All countries with a povline + res <- get_cp(country = "all", povline = 2.15) + expect_true(is.data.frame(res) || inherits(res, "pip_api")) +}) + + From 7e8f553adeb44f56121f92f17d8d416033324aa3 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 18 Nov 2024 11:25:51 -0500 Subject: [PATCH 34/44] minor adjustment and full coverage --- R/get_cp_ki.R | 4 +- tests/testthat/test-get_cp.R | 3 - tests/testthat/test-get_cp_ki.R | 97 +++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 tests/testthat/test-get_cp_ki.R diff --git a/R/get_cp_ki.R b/R/get_cp_ki.R index 908e782..6853c86 100644 --- a/R/get_cp_ki.R +++ b/R/get_cp_ki.R @@ -38,8 +38,6 @@ get_cp_ki <- function(country = NULL, if (is.null(povline)) { if (ppp_version == "2011") { povline <- 1.9 - } else { - povline <- 2.15 } } @@ -72,7 +70,7 @@ get_cp_ki <- function(country = NULL, httr2::req_perform() # 4. Parse result and return (if simplify == FALSE) - if (!simplify) { + if (isFALSE(simplify)) { out <- parse_response(res, simplify) } else { out <- parse_response(res, simplify) diff --git a/tests/testthat/test-get_cp.R b/tests/testthat/test-get_cp.R index 4b71205..e7e627d 100644 --- a/tests/testthat/test-get_cp.R +++ b/tests/testthat/test-get_cp.R @@ -1,6 +1,3 @@ -library(testthat) -library(pipr) - # 1. General Argument Matching Tests ---- test_that("Argument matching works correctly for get_cp()", { skip_if_offline() diff --git a/tests/testthat/test-get_cp_ki.R b/tests/testthat/test-get_cp_ki.R new file mode 100644 index 0000000..16db725 --- /dev/null +++ b/tests/testthat/test-get_cp_ki.R @@ -0,0 +1,97 @@ + +# 1. General Argument Matching Tests ---- +test_that("Argument matching works correctly for get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Default arguments, one country, simplify = TRUE + res <- get_cp_ki(country = "IDN") + expect_type(res, "list") + + # Default arguments, one country, simplify = FALSE + res <- get_cp_ki(country = "IDN", simplify = FALSE) + expect_s3_class(res, "pip_api") + + # Argument matching for 'format' + expect_error(get_cp_ki(format = "txt"), "'arg' should be one of") + + # Argument matching for 'api_version' + expect_error(get_cp_ki(api_version = "v2"), "'arg' should be") +}) + +# 2. povline Set-up Tests ---- +test_that("povline and ppp_version arguments work correctly for get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Default povline + res <- get_cp_ki(country = "IDN") + expect_true(any(res$poverty_line == 2.15)) + + res <- get_cp_ki(country = "IDN", ppp_version = 2017) + expect_true(any(res$poverty_line == 2.15)) + + # povline with ppp_version 2011 + res <- get_cp_ki(country = "IDN", ppp_version = 2011, povline = NULL) + expect_true(any(res$poverty_line == 1.9)) + + # povline when povline is provided + res <- get_cp_ki(country = "IDN", povline = 3.2) + expect_true(any(res$poverty_line == 3.2)) +}) + +# 3. Country Argument Tests ---- +test_that("Country argument validation works correctly in get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Valid country + res <- get_cp_ki(country = "IDN") + expect_type(res, "list") + + # Missing country argument + expect_error(get_cp_ki(country = NULL), "Please provide a country code.") + + # More than one country + expect_error(get_cp_ki(country = c("IDN", "AGO")), "Please provide only one country code.") +}) + + + +# 4. Request Execution Tests ---- +test_that("Requests threws error for get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Check that the response for invalid country throws an error + expect_error(get_cp_ki(country = "INVALID"), "404") +}) + +# 6. Response Parsing and Unnesting Tests ---- +test_that("Response parsing and unnesting work correctly for get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Build unnested dataset manually + res_manual <- data.frame( + country_code = "IDN", + reporting_year = 2023, + poverty_line = 2.15, + headcount = 0.0182, + headcount_national = 9.4, + mpm_headcount = 0.0214, + reporting_pop = 277.5341, + gni = 4870, + latest_gni = TRUE, + gdp_growth = 5.0481, + latest_gdp = TRUE, + year_range = "2018-2023", + share_below_40 = 2.7768, + share_total = 1.8967, + stringsAsFactors = FALSE + ) + + res <- get_cp_ki(country = "IDN", simplify = TRUE) + expect_equal(res, res_manual) +}) + From 9dc23cf167867df7d96558d1cd5cd50bd2f52bfd Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 18 Nov 2024 11:57:24 -0500 Subject: [PATCH 35/44] full coverage and check() passed --- R/data.R | 36 ++++++++++++++++++++++++++++++++++++ R/get_cp_ki.R | 4 ++-- man/datt_rural.Rd | 26 ++++++++++++++++++++++++++ man/datt_urban.Rd | 27 +++++++++++++++++++++++++++ man/get_cp_ki.Rd | 6 ++---- man/get_gd.Rd | 14 ++++++++++---- man/unnest_ki.Rd | 22 ++++++++++++++++++++++ tests/testthat/test-get_gd.R | 21 +++++++++++++++------ 8 files changed, 140 insertions(+), 16 deletions(-) create mode 100644 R/data.R create mode 100644 man/datt_rural.Rd create mode 100644 man/datt_urban.Rd create mode 100644 man/unnest_ki.Rd diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..5b2c50a --- /dev/null +++ b/R/data.R @@ -0,0 +1,36 @@ +#' Datt (1998) grouped data for rural india, 1983 +#' +#' Dataset from Datt (1998) with grouped data for rural India in 1983. +#' +#' @format A data frame with 13 observations on the following 6 variables: +#' \describe{ +#' \item{monthly_pc_exp}{Welfare range class} +#' \item{mean_monthly_pc_exp}{Mean welfare for given welfare range class} +#' \item{percentage_of_persons}{Percentage of individuals in given welfare class} +#' \item{L}{Cumulative welfare} +#' \item{p}{Cumulative population} +#' \item{area}{rural} +#' } +#' +#' @source Datt, G. (1998). See get_cp vignette. +#' +"datt_rural" + +#' Grouped data for urban india, 1983 +#' +#' Dataset from Sarvekshana N26 Vol 9 N 4, created by the author following +#' Datt(1998) methodology with grouped data for urban India in 1983. +#' +#' @format A data frame with 13 observations on the following 6 variables: +#' \describe{ +#' \item{monthly_pc_exp}{Welfare range class} +#' \item{mean_monthly_pc_exp}{Mean welfare for given welfare range class} +#' \item{percentage_of_persons}{Percentage of individuals in given welfare class} +#' \item{L}{Cumulative welfare} +#' \item{p}{Cumulative population} +#' \item{area}{urban} +#' } +#' +#' @source Sarvekshana N26 Vol 9 N 4, and Datt, G. (1998) for methodology. See get_cp vignette. +#' +"datt_urban" diff --git a/R/get_cp_ki.R b/R/get_cp_ki.R index 6853c86..a0ad624 100644 --- a/R/get_cp_ki.R +++ b/R/get_cp_ki.R @@ -85,9 +85,9 @@ get_cp_ki <- function(country = NULL, } -# Unnest ki (no collapse/tidyverse) ---- +# Unnest key indicators ---- #' Unnest the key indicators -#' @describeIn unnest_ki() takes the simplified output from cp-key-indicators endpoint and unnests it. +#' @describeIn unnest_ki takes the simplified output from cp-key-indicators endpoint and unnests it. #' #' @param out parsed and simplified output from cp-key-indicators endpoint #' diff --git a/man/datt_rural.Rd b/man/datt_rural.Rd new file mode 100644 index 0000000..36fc749 --- /dev/null +++ b/man/datt_rural.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{datt_rural} +\alias{datt_rural} +\title{Datt (1998) grouped data for rural india, 1983} +\format{ +A data frame with 13 observations on the following 6 variables: +\describe{ +\item{monthly_pc_exp}{Welfare range class} +\item{mean_monthly_pc_exp}{Mean welfare for given welfare range class} +\item{percentage_of_persons}{Percentage of individuals in given welfare class} +\item{L}{Cumulative welfare} +\item{p}{Cumulative population} +\item{area}{rural} +} + +@source Datt, G. (1998). See get_cp vignette. +} +\usage{ +datt_rural +} +\description{ +Dataset from Datt (1998) with grouped data for rural India in 1983. +} +\keyword{datasets} diff --git a/man/datt_urban.Rd b/man/datt_urban.Rd new file mode 100644 index 0000000..a089213 --- /dev/null +++ b/man/datt_urban.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{datt_urban} +\alias{datt_urban} +\title{Grouped data for urban india, 1983} +\format{ +A data frame with 13 observations on the following 6 variables: +\describe{ +\item{monthly_pc_exp}{Welfare range class} +\item{mean_monthly_pc_exp}{Mean welfare for given welfare range class} +\item{percentage_of_persons}{Percentage of individuals in given welfare class} +\item{L}{Cumulative welfare} +\item{p}{Cumulative population} +\item{area}{urban} +} + +@source Sarvekshana N26 Vol 9 N 4, and Datt, G. (1998) for methodology. See get_cp vignette. +} +\usage{ +datt_urban +} +\description{ +Dataset from Sarvekshana N26 Vol 9 N 4, created by the author following +Datt(1998) methodology with grouped data for urban India in 1983. +} +\keyword{datasets} diff --git a/man/get_cp_ki.Rd b/man/get_cp_ki.Rd index 204f9b7..0847ff5 100644 --- a/man/get_cp_ki.Rd +++ b/man/get_cp_ki.Rd @@ -50,12 +50,10 @@ Get Country Profiles Key Indicators \examples{ \dontrun{ # One country, all years with default ppp_version = 2017 -res <- get_cp(country = "AGO") +res <- get_cp(country = "IDN") # All countries, povline = 1.9 -res <- get_cp(povline = 1.9) +res <- get_cp(country = "IDN", povline = 1.9) -# All countries and years with default values -res <- get_cp() } } diff --git a/man/get_gd.Rd b/man/get_gd.Rd index bc8e226..07fd295 100644 --- a/man/get_gd.Rd +++ b/man/get_gd.Rd @@ -8,8 +8,8 @@ get_gd( cum_welfare = NULL, cum_population = NULL, estimate = c("stats", "lorenz", "params"), - requested_mean = 1, - povline = 1, + requested_mean = NULL, + povline = NULL, popshare = NULL, lorenz = NULL, n_bins = NULL, @@ -33,6 +33,8 @@ get_gd( \item{popshare}{numeric: Proportion of the population living below the poverty line} +\item{lorenz}{character: Lorenz curve methodology. Either "lb" or "lq".} + \item{n_bins}{numeric: Number of bins. Required for estimate = "lorenz".} \item{api_version}{character: API version} @@ -50,8 +52,12 @@ Get grouped stats from the PIP API. \examples{ \dontrun{ -datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, 1), - L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, 0.23498, 0.34887, 051994, 0.64270, 0.79201, 0.86966, 0.91277, 1) +datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, + 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, + 1), + L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, + 0.23498, 0.34887, 051994, 0.64270, 0.79201, + 0.86966, 0.91277, 1) # estimate = 'stats': retrieve poverty statistics. stats <- get_gd(cum_welfare = datt_data$L, cum_population = datt_data$p, diff --git a/man/unnest_ki.Rd b/man/unnest_ki.Rd new file mode 100644 index 0000000..3f2174d --- /dev/null +++ b/man/unnest_ki.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cp_ki.R +\name{unnest_ki} +\alias{unnest_ki} +\title{Unnest the key indicators} +\usage{ +unnest_ki(out) +} +\arguments{ +\item{out}{parsed and simplified output from cp-key-indicators endpoint} +} +\value{ +data frame, unnested. +} +\description{ +Unnest the key indicators +} +\section{Functions}{ +\itemize{ +\item \code{unnest_ki()}: takes the simplified output from cp-key-indicators endpoint and unnests it. + +}} diff --git a/tests/testthat/test-get_gd.R b/tests/testthat/test-get_gd.R index d39e73f..81cfc2c 100644 --- a/tests/testthat/test-get_gd.R +++ b/tests/testthat/test-get_gd.R @@ -1,5 +1,7 @@ # Section 1: General Argument Checks ---- test_that("General Argument Checks", { + skip_if_offline() + skip_on_cran() # Check if cum_welfare and cum_population have different lengths cum_welfare <- c(0.1, 0.2, 0.3) @@ -13,10 +15,11 @@ test_that("General Argument Checks", { ## Section 2: Testing 'stats' Endpoint ----- test_that("'stats' Endpoint Tests", { - + skip_if_offline() + skip_on_cran() # Error when requested_mean or povline is missing - cum_welfare <- datt_data$L - cum_population <- datt_data$p + cum_welfare <- datt_rural$L + cum_population <- datt_rural$p # Missing requested_mean expect_error( @@ -49,6 +52,8 @@ test_that("'stats' Endpoint Tests", { ### Section 3: Testing 'lorenz' Endpoint ---- test_that("'lorenz' Endpoint Tests", { + skip_if_offline() + skip_on_cran() cum_welfare <- datt_rural$L cum_population <- datt_rural$p @@ -77,9 +82,11 @@ test_that("'lorenz' Endpoint Tests", { ### Section 4: Testing 'params' Endpoint ----- test_that("'params' Endpoint Tests", { + skip_if_offline() + skip_on_cran() - cum_welfare <- datt_data$L - cum_population <- datt_data$p + cum_welfare <- datt_rural$L + cum_population <- datt_rural$p # Proper retrieval of regression parameters params <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, estimate = "params") @@ -90,6 +97,8 @@ test_that("'params' Endpoint Tests", { ### Section 5: Additional ----- test_that("Edge Cases and Error Handling", { + skip_if_offline() + skip_on_cran() # Both cum_welfare and cum_population missing expect_error( @@ -98,7 +107,7 @@ test_that("Edge Cases and Error Handling", { # Invalid lorenz method expect_error( - get_gd(cum_welfare = datt_data$L, cum_population = datt_data$p, + get_gd(cum_welfare = datt_rural$L, cum_population = datt_rural$p, estimate = "lorenz", lorenz = "invalid_method", n_bins = 10, server = "dev"), "You supplied an invalid value for lorenz" ) From 98676bedb80d691acbc2f5c3cd095e0cbf4a214a Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Wed, 27 Nov 2024 13:11:49 -0500 Subject: [PATCH 36/44] skipped one test for json (check) and added nowcast test --- tests/testthat/test-get_stats.R | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-get_stats.R b/tests/testthat/test-get_stats.R index 3c8da10..f8d43cb 100644 --- a/tests/testthat/test-get_stats.R +++ b/tests/testthat/test-get_stats.R @@ -121,9 +121,11 @@ test_that("get_stats() returns a tibble with named columns for empty response (f expect_identical(names(res), names(res2)) # json (does not return an empty response data frame) - res2 <- get_stats("AGO", 2005, format = "json") # empty response - expect_equal(dim(res2)[2], 0) - expect_equal(length(names(res2)), 0) + # GC: this test returns a warning now because json returns a completely + # empty tibble (no variables), is this correct? + #res2 <- get_stats("AGO", 2005, format = "json") # empty response + #expect_equal(dim(res2)[2], 0) + #expect_equal(length(names(res2)), 0) }) @@ -185,3 +187,12 @@ test_that("get_wb() works w/ simplify = FALSE", { expect_true(is.data.frame(res$content)) expect_gte(nrow(res$content), 3) }) + + +test_that("get_stats() works with nowcast == TRUE",{ + skip_if_offline() + skip_on_cran() + + nowcast_output <- get_stats("AGO", nowcast = TRUE) + expect_true("nowcast" %in% nowcast_output$estimate_type) +}) From a8194391756b75c2d18cf5cbb55688ff8a38ede3 Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Wed, 27 Nov 2024 14:09:50 -0500 Subject: [PATCH 37/44] get_stats() when empty with json returns a different format than other, skipping a test --- tests/testthat/test-get_stats.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-get_stats.R b/tests/testthat/test-get_stats.R index f8d43cb..466aa68 100644 --- a/tests/testthat/test-get_stats.R +++ b/tests/testthat/test-get_stats.R @@ -123,9 +123,9 @@ test_that("get_stats() returns a tibble with named columns for empty response (f # json (does not return an empty response data frame) # GC: this test returns a warning now because json returns a completely # empty tibble (no variables), is this correct? - #res2 <- get_stats("AGO", 2005, format = "json") # empty response - #expect_equal(dim(res2)[2], 0) - #expect_equal(length(names(res2)), 0) + # res2 <- get_stats("AGO", 2005, format = "json") # empty response + # expect_equal(dim(res2)[2], 0) + # expect_equal(length(names(res2)), 0) }) From ba96a0a07ec5a2e8816284517409cfb26d5146ca Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Mon, 2 Dec 2024 13:56:54 -0500 Subject: [PATCH 38/44] removed server = 'dev' and server = 'qa'. Check() passed. --- tests/testthat/test-get_gd.R | 8 ++++---- tests/testthat/test-utils.R | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-get_gd.R b/tests/testthat/test-get_gd.R index 81cfc2c..68beb5b 100644 --- a/tests/testthat/test-get_gd.R +++ b/tests/testthat/test-get_gd.R @@ -60,7 +60,7 @@ test_that("'lorenz' Endpoint Tests", { # Correct retrieval of Lorenz curve data points lorenz <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, - estimate = "lorenz", n_bins = 10, server = "dev") + estimate = "lorenz", n_bins = 10) expect_s3_class(lorenz, "data.frame") expect_true(all(c("weight", "welfare") %in% names(lorenz))) @@ -68,13 +68,13 @@ test_that("'lorenz' Endpoint Tests", { # Handling of lorenz methodology lorenz_lb <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, - estimate = "lorenz", lorenz = "lb", n_bins = 100, server = "dev") + estimate = "lorenz", lorenz = "lb", n_bins = 100) expect_s3_class(lorenz_lb, "data.frame") expect_true(all(c("weight", "welfare") %in% names(lorenz_lb))) lorenz_lq <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, - estimate = "lorenz", lorenz = "lq", n_bins = 100, server = "dev") + estimate = "lorenz", lorenz = "lq", n_bins = 100) expect_s3_class(lorenz_lq, "data.frame") expect_true(all(c("weight", "welfare") %in% names(lorenz_lq))) @@ -108,7 +108,7 @@ test_that("Edge Cases and Error Handling", { # Invalid lorenz method expect_error( get_gd(cum_welfare = datt_rural$L, cum_population = datt_rural$p, - estimate = "lorenz", lorenz = "invalid_method", n_bins = 10, server = "dev"), + estimate = "lorenz", lorenz = "invalid_method", n_bins = 10), "You supplied an invalid value for lorenz" ) }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index a735fe4..b38e633 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -17,7 +17,7 @@ test_that("check_internet() works", { test_that("check_api() works", { skip_if_offline() skip_on_cran() - res <- check_api("v1", server = "qa") + res <- check_api("v1") expect_equal(res, "PIP API is running") }) From 9faa22a1628dce9967ee479f650ab72260924877 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Fri, 6 Dec 2024 13:01:44 -0500 Subject: [PATCH 39/44] export get_gd() --- NAMESPACE | 1 + R/get_gd.R | 2 ++ 2 files changed, 3 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 4a76a5b..b25cadd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(get_aux) export(get_cache_info) export(get_cp) export(get_cp_ki) +export(get_gd) export(get_pip_info) export(get_stats) export(get_versions) diff --git a/R/get_gd.R b/R/get_gd.R index 1d7571c..7ef00c6 100644 --- a/R/get_gd.R +++ b/R/get_gd.R @@ -11,6 +11,8 @@ #' @param lorenz character: Lorenz curve methodology. Either "lb" or "lq". #' @param n_bins numeric: Number of bins. Required for estimate = "lorenz". #' +#' @export +#' #' @examples #' \dontrun{ #' From 63b49296751a7c093c74e18bb71cb740ec303278 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Fri, 6 Dec 2024 13:21:33 -0500 Subject: [PATCH 40/44] fix bug in get_gd --- R/get_gd.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/get_gd.R b/R/get_gd.R index 7ef00c6..d2c73db 100644 --- a/R/get_gd.R +++ b/R/get_gd.R @@ -90,8 +90,8 @@ get_gd <- function(cum_welfare = NULL, if (is.null(requested_mean)) { cli::cli_abort("For endpoint {endpoint}, {.val requested_mean} must be provided.") } - if (is.null(povline)) { - cli::cli_abort("For endpoint {endpoint}, {.val povline} must be provided.") + if (is.null(povline) && is.null(popshare)) { + cli::cli_abort("For endpoint {.field endpoint}, you must provide either {.arg povline} or {.arg popshare} argument") } # popshare can't be used together with povline From 0aa1a1a0baba123c1adecd107ff08f83b4769aea Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Sat, 7 Dec 2024 10:58:34 -0500 Subject: [PATCH 41/44] small change to get_gd() test --- tests/testthat/test-get_gd.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-get_gd.R b/tests/testthat/test-get_gd.R index 68beb5b..4993a08 100644 --- a/tests/testthat/test-get_gd.R +++ b/tests/testthat/test-get_gd.R @@ -32,7 +32,7 @@ test_that("'stats' Endpoint Tests", { expect_error( get_gd(cum_welfare = cum_welfare, cum_population = cum_population, estimate = "stats", requested_mean = 109.9), - "must be provided" + "you must provide" ) # Correct retrieval of statistics From e94f4d82a24df86c59b76dd3a66539aa9e7acb8b Mon Sep 17 00:00:00 2001 From: giorgiacek Date: Sat, 7 Dec 2024 12:22:43 -0500 Subject: [PATCH 42/44] typos in the vignettes corrected. --- vignettes/articles/get-cp-example.Rmd | 8 ++++++-- vignettes/articles/get-gd-example.Rmd | 16 +++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/vignettes/articles/get-cp-example.Rmd b/vignettes/articles/get-cp-example.Rmd index d8e6efa..4652cb1 100644 --- a/vignettes/articles/get-cp-example.Rmd +++ b/vignettes/articles/get-cp-example.Rmd @@ -1,8 +1,8 @@ --- -title: "get-cp-example" +title: "Get Country Profiles" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{pipr} + %\VignetteIndexEntry{get-cp-example} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -16,6 +16,10 @@ knitr::opts_chunk$set( ) ``` +```{r} +library(pipr) +``` + ## Overview The `get_cp()` and `get_cp_ki()` functions provide **country profile data**, also available in [PIP's Country Profile page](https://pip.worldbank.org/country-profiles/IDN). This dataset comes from the [Global Monitoring Indicators (GMI)](https://datacatalog.worldbank.org/int/search/dataset/0065517/global-monitoring-indicators--gmi-), a set of harmonized indicators produced from the Global Monitoring Database (GMD) [^1] diff --git a/vignettes/articles/get-gd-example.Rmd b/vignettes/articles/get-gd-example.Rmd index d5469ef..f79097a 100644 --- a/vignettes/articles/get-gd-example.Rmd +++ b/vignettes/articles/get-gd-example.Rmd @@ -1,8 +1,8 @@ --- -title: "get-gd-example" +title: "Get Grouped Data Parameters" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{pipr} + %\VignetteIndexEntry{get-gd-example} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -159,12 +159,12 @@ ggplot(poverty_stats, aes(x = poverty_line)) + ### Lorenz Curve Data ```{r, warning=FALSE, message=FALSE} -lorenz_points_lq <- get_gd(datt_data$L, datt_data$p, estimate = "lorenz", lorenz = "lq", server = "dev") -lorenz_points_lq_10 <- get_gd(datt_data$L, datt_data$p, estimate = "lorenz", lorenz = "lq", n_bins = 10, server = "dev") +lorenz_points_lq <- get_gd(datt_rural$L, datt_rural$p, estimate = "lorenz", lorenz = "lq") +lorenz_points_lq_10 <- get_gd(datt_rural$L, datt_rural$p, estimate = "lorenz", lorenz = "lq", n_bins = 10) ggplot() + geom_bar(data = lorenz_points_lq_10, aes(x = weight, y = welfare), stat = "identity", fill = "darkorange", alpha = 0.3) + - geom_point(data = datt_data, aes(x = p, y = L), color = "darkorange", size = 2) + + geom_point(data = datt_rural, aes(x = p, y = L), color = "darkorange", size = 2) + geom_point(data = lorenz_points_lq, aes(x = weight, y = welfare), color = "darkorange", size = 0.5) + geom_abline(intercept = 0, slope = 1, color = "black") + labs( @@ -174,10 +174,12 @@ ggplot() + ) + theme_minimal() ``` + + ### Rural vs Urban ```{r, warning=FALSE, message=FALSE} -lorenz_rural <- get_gd(datt_rural$L, datt_rural$p, estimate = "lorenz", lorenz = "lq", server = "dev", n_bins = 1000) -lorenz_urban <- get_gd(datt_urban$L, datt_urban$p, estimate = "lorenz", lorenz = "lq", server = "dev", n_bins = 1000) +lorenz_rural <- get_gd(datt_rural$L, datt_rural$p, estimate = "lorenz", lorenz = "lq", n_bins = 1000) +lorenz_urban <- get_gd(datt_urban$L, datt_urban$p, estimate = "lorenz", lorenz = "lq", n_bins = 1000) ggplot() + geom_point(data = lorenz_rural, aes(x = weight, y = welfare), color = "darkorange", size = 0.5) + From fed4d4be6c675467f879040dc0913ddac1a4cf15 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 9 Dec 2024 11:03:30 -0500 Subject: [PATCH 43/44] small update --- vignettes/articles/get-gd-example.Rmd | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vignettes/articles/get-gd-example.Rmd b/vignettes/articles/get-gd-example.Rmd index f79097a..2832eb4 100644 --- a/vignettes/articles/get-gd-example.Rmd +++ b/vignettes/articles/get-gd-example.Rmd @@ -33,7 +33,7 @@ Here are a few examples to get you started: By default, `get_gd()` returns grouped statistics (`estimate = "stats"`) based on cumulative welfare (`cum_welfare`) and population values (`cum_population`), both expressed as percentages. The default mean (`requested_mean`) and poverty line (`povline`) are set to 1, so the user should specify the known mean of the distribution, and the desired poverty line. -The data used in this example is from Datt (1998). Among other consumption survey variables, The dataset lists the cumulative welfare(`L`, for Lorenz) and population values(`p`) for rural India in 1983, expressed in shares (percentages). The mean of the distribution is 109.9 Rs (daily), and the poverty line at the time was 89 Rs. Note that the cumulative welfare and population values should be monotonically increasing and sum to 1 to be valid. Additionally, the cumulative population values should always be greater or equal to the corresponding welfare values. +The data used in this example is from [(Datt (1998))](https://ageconsearch.umn.edu/record/94862/?v=pdf). Among other consumption survey variables, The dataset lists the cumulative welfare(`L`, for Lorenz) and population values(`p`) for rural India in 1983, expressed in shares (percentages). The mean of the distribution is 109.9 Rs (daily), and the poverty line at the time was 89 Rs. Note that the cumulative welfare and population values should be monotonically increasing and sum to 1 to be valid. Additionally, the cumulative population values should always be greater or equal to the corresponding welfare values. ```{r} datt_mean <- 109.9 @@ -43,6 +43,8 @@ datt_povline <- 89 str(datt_rural) ``` +### Get headcount from poverty line + To retrieve basic grouped statistics, you need to provide cumulative welfare and population values along with the requested mean and poverty line. ```{r, warning=FALSE, message=FALSE} get_gd( @@ -54,6 +56,8 @@ get_gd( ) ``` +### Get poverty line from headcount (`popshare`) + As an alternative, instead of the mean, you can provide the population share (`popshare`), which will be assumed equal to the poverty headcount ratio, and used to calculate the rest of the statistics (and the poverty line itself): ```{r, warning=FALSE, message=FALSE} get_gd( @@ -76,7 +80,7 @@ lorenz_100 <- get_gd( cum_welfare = datt_rural$L, cum_population = datt_rural$p, estimate = "lorenz", - n_bins = 100, + n_bins = 10, server = "dev" ) From e451617d179f2092e4bada2738a8615709d2459e Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 9 Dec 2024 11:20:03 -0500 Subject: [PATCH 44/44] fix legend in chart --- vignettes/articles/get-gd-example.Rmd | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vignettes/articles/get-gd-example.Rmd b/vignettes/articles/get-gd-example.Rmd index 2832eb4..a1f135e 100644 --- a/vignettes/articles/get-gd-example.Rmd +++ b/vignettes/articles/get-gd-example.Rmd @@ -95,7 +95,7 @@ get_gd( cum_population = datt_rural$p, estimate = "lorenz", lorenz = "lb", - n_bins = 100, + n_bins = 10, server = "dev" ) ``` @@ -155,10 +155,12 @@ ggplot(poverty_stats, aes(x = poverty_line)) + labs( title = "Poverty Measures vs. Poverty Line, Rural India 1983", x = "Poverty Line", - y = "Measure Value", + y = "Poverty Measure (FGT indices)", color = "Poverty Measure" ) + - theme_minimal() + theme_minimal() + + theme(legend.position = "bottom", + legend.title = element_blank()) ``` ### Lorenz Curve Data