diff --git a/DESCRIPTION b/DESCRIPTION index a577330..cebea23 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pipr Title: Client for the Poverty and Inequality Platform ('PIP') API -Version: 1.0.0 +Version: 1.1.0 Authors@R: c(person(given = "Tony", family = "Fujs", @@ -28,7 +28,7 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 URL: https://worldbank.github.io/pipr/, https://github.com/worldbank/pipr, @@ -41,7 +41,6 @@ Suggests: rmarkdown, markdown, callr, - mockery, ggplot2, tidyr, ggthemes, @@ -50,20 +49,20 @@ Suggests: dplyr, readr Language: en-US -Imports: +Imports: + arrow, attempt, curl, - httr, jsonlite, tibble, purrr, - memoise, - cachem, - data.table, cli, rlang, - utils + utils, + httr2, + stringr, + vroom Depends: - R (>= 3.6.0) + R (>= 4.1.0) Config/testthat/edition: 3 Date: 2023-04-28 diff --git a/NAMESPACE b/NAMESPACE index 9a730e2..b25cadd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,15 @@ # Generated by roxygen2: do not edit by hand export(call_aux) +export(change_grouped_stats_to_csv) export(check_api) +export(delete_cache) export(display_aux) export(get_aux) +export(get_cache_info) +export(get_cp) +export(get_cp_ki) +export(get_gd) export(get_pip_info) export(get_stats) export(get_versions) diff --git a/NEWS.md b/NEWS.md index a3798ea..901afe3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# pipr 1.1.0 + +* [Use httr2](https://github.com/worldbank/pipr/pull/70) + * API responses are now cached locally according to the PIP API cache policy + from the PIP API responses headers + * `pipr` automatically handles retries when hitting the PIP API rate limiting + threshold + * Improved translation of HTTP errors into R error messages +* New helper functions `delete_cache()` and `get_cache_info()` + # pipr 1.0.0 * [Mock live API calls or skip them on CRAN](https://github.com/worldbank/pipr/pull/45) diff --git a/R/aaa.R b/R/aaa.R index 84d15a3..8c85037 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -1,23 +1,24 @@ -.pip <- new.env(parent = emptyenv()) +.pip <- new.env(parent = emptyenv()) # PR 63 +.pipcache <- new.env(parent = emptyenv()) # PR 63 #' Set auxiliary table in .pip environment for later call #' #' @param table character: name of the table in .pip env #' @param value data to be saved -#' @inheritParams get_aux +#' @param replace logical. #' #' @return Invisible TRUE if set correctly. FALSE otherwise #' @keywords internal set_aux <- function(table, value, - force = FALSE) { + replace = FALSE) { # PR 63 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## Evaluate if exists -------- to_set <- 1 if (rlang::env_has(.pip, table)) { - if (force == FALSE) { + if (isFALSE(replace)) { # PR 63 cli::cli_alert("Table {.field {table}} already exists.") to_set <- utils::menu(c("Replace with new table", "Abort")) } @@ -65,12 +66,12 @@ set_aux <- function(table, #' @examples #' # call one table #' -#' get_aux("gdp", assign_tb = TRUE, force = TRUE) +#' get_aux("gdp", assign_tb = TRUE, replace = TRUE) # PR 63 #' call_aux("gdp") #' #' # see the name of several tables in memory #' tb <- c("cpi", "ppp", "pop") -#' lapply(tb, get_aux, assign_tb = TRUE, force = TRUE) +#' lapply(tb, get_aux, assign_tb = TRUE, replace = TRUE) # PR 63 #' call_aux() call_aux <- function(table = NULL) { @@ -119,7 +120,9 @@ call_aux <- function(table = NULL) { return(rlang::env_get(.pip, table)) } else { msg <- c("*" = "Table {.field {table}} does not exist") - cli::cli_abort(msg, wrap = TRUE) + cli::cli_abort(msg, + + wrap = TRUE) } } diff --git a/R/build_request.R b/R/build_request.R new file mode 100644 index 0000000..6570751 --- /dev/null +++ b/R/build_request.R @@ -0,0 +1,88 @@ +#' Build request version 2 +#' +#' @param server character: Server. For WB internal use only +#' @param api_version character: API version +#' @param endpoint character: PIP API endpoint +#' @param ... other parameters +#' +#' @return httr2 request +#' +build_request <- function(server, + api_version, + endpoint, + ...) { + + + + base_url <- select_base_url(server = server) + params <- list(...) + + req <- httr2::request(base_url) |> + httr2::req_url_path_append(api_version) |> + httr2::req_url_path_append(endpoint) |> + # .multi = "comma" works fine without applying fix_params + httr2::req_url_query(!!!params, .multi = "comma") |> + httr2::req_cache(tools::R_user_dir("pipr", which = "cache"), + use_on_error = TRUE, + debug = TRUE) |> + httr2::req_user_agent(pipr_user_agent) |> + httr2::req_error(body = parse_error_body) |> + httr2::req_retry( + is_transient = pip_is_transient, + after = retry_after, + max_seconds = 60 + ) + + + return(req) + +} + + + + +#' build_request, OLD version +#' +#' @param server character: Server. For WB internal use only +#' @param api_version character: API version +#' @param endpoint character: PIP API endpoint +#' @param ... other parameters +#' +#' @return httr2 request +#' +build_request_old <- function(server, + api_version, + endpoint, + ...) { + + base_url <- select_base_url(server = server) + + params <- list(...) + params <- lapply(params, fix_params) + + req <- httr2::request(base_url) |> + httr2::req_url_path_append(api_version) |> + httr2::req_url_path_append(endpoint) |> + httr2::req_url_query(!!!params) |> + httr2::req_cache(tools::R_user_dir("pipr", which = "cache"), + use_on_error = TRUE, + debug = TRUE) |> + httr2::req_user_agent(pipr_user_agent) |> + httr2::req_error(body = parse_error_body) |> + httr2::req_retry( + is_transient = pip_is_transient, + after = retry_after, + max_seconds = 60 + ) + + return(req) + +} + +fix_params <- function(param) { + if (length(param) > 1) { + return(paste(param, collapse = ",")) + } else { + return(param) + } +} diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..5b2c50a --- /dev/null +++ b/R/data.R @@ -0,0 +1,36 @@ +#' Datt (1998) grouped data for rural india, 1983 +#' +#' Dataset from Datt (1998) with grouped data for rural India in 1983. +#' +#' @format A data frame with 13 observations on the following 6 variables: +#' \describe{ +#' \item{monthly_pc_exp}{Welfare range class} +#' \item{mean_monthly_pc_exp}{Mean welfare for given welfare range class} +#' \item{percentage_of_persons}{Percentage of individuals in given welfare class} +#' \item{L}{Cumulative welfare} +#' \item{p}{Cumulative population} +#' \item{area}{rural} +#' } +#' +#' @source Datt, G. (1998). See get_cp vignette. +#' +"datt_rural" + +#' Grouped data for urban india, 1983 +#' +#' Dataset from Sarvekshana N26 Vol 9 N 4, created by the author following +#' Datt(1998) methodology with grouped data for urban India in 1983. +#' +#' @format A data frame with 13 observations on the following 6 variables: +#' \describe{ +#' \item{monthly_pc_exp}{Welfare range class} +#' \item{mean_monthly_pc_exp}{Mean welfare for given welfare range class} +#' \item{percentage_of_persons}{Percentage of individuals in given welfare class} +#' \item{L}{Cumulative welfare} +#' \item{p}{Cumulative population} +#' \item{area}{urban} +#' } +#' +#' @source Sarvekshana N26 Vol 9 N 4, and Datt, G. (1998) for methodology. See get_cp vignette. +#' +"datt_urban" diff --git a/R/display_aux.R b/R/display_aux.R index d8e9105..59e288e 100644 --- a/R/display_aux.R +++ b/R/display_aux.R @@ -31,9 +31,12 @@ display_aux <- function(version = NULL, # ____________________________________________________________________________ # Build query string #### + req <- build_request(server = server, + api_version = api_version, + endpoint = "aux") + res <- req |> + httr2::req_perform() - u <- build_url(server, "aux", api_version = api_version) - res <- httr::GET(u) tbs_tb <- parse_response(res, simplify = simplify) tbs <- tbs_tb[["tables"]] if (isTRUE(run_cli)) { diff --git a/R/get_aux.R b/R/get_aux.R index ae70c65..e792254 100644 --- a/R/get_aux.R +++ b/R/get_aux.R @@ -13,7 +13,8 @@ #' assigned to exactly the same name as the one of the desired table. If #' character, the table will be assigned to that name. #' @inheritParams get_stats -#' @param force logical: force replacement. Default is FALSE +#' @param replace logical: force replacement of aux files in `.pip` env. Default +#' is FALSE. #' #' @return If `simplify = FALSE`, it returns a list of class "pip_api". If #' `simplify = TRUE`, it returns a tibble with the requested data. This is the @@ -54,19 +55,22 @@ get_aux <- function(table = NULL, simplify = TRUE, server = NULL, assign_tb = FALSE, - force = FALSE) { + replace = FALSE) { # Match args api_version <- match.arg(api_version) format <- match.arg(format) run_cli <- run_cli() # Build query string - u <- build_url(server, "aux", api_version = api_version) + req <- build_request(server = server, + api_version = api_version, + endpoint = "aux") # Return response # If no table is specified, returns list of available tables if (is.null(table)) { - res <- httr::GET(u) + res <- req |> + httr2::req_perform() tables <- parse_response(res, simplify = simplify) cli::cli_text("Auxiliary tables available are") cli::cli_ul(tables$tables) @@ -80,12 +84,15 @@ get_aux <- function(table = NULL, return(invisible(tables)) # If a table is specified, returns that table } else { - args <- build_args(.table = table, - .version = version, - .ppp_version = ppp_version, - .release_version = release_version, - .format = format) - res <- httr::GET(u, query = args, httr::user_agent(pipr_user_agent)) + req <- build_request(server = server, + api_version = api_version, + endpoint = "aux", + table = table, + version = version, + release_version = release_version, + format = format) + + res <- httr2::req_perform(req) rt <- parse_response(res, simplify = simplify) } @@ -107,7 +114,7 @@ get_aux <- function(table = NULL, srt <- set_aux(table = tb_name, value = rt, - force = force) + replace = replace) if (isTRUE(srt)) { diff --git a/R/get_cp.R b/R/get_cp.R new file mode 100644 index 0000000..356eaab --- /dev/null +++ b/R/get_cp.R @@ -0,0 +1,70 @@ +#' Get Country Profiles +#' +#' @inheritParams get_stats +#' +#' @return If `simplify = FALSE`, it returns a list of class "pip_api". If +#' `simplify = TRUE`, it returns a tibble with the requested data. This is the +#' default. Only for `get_aux()`, If `assign_tb = TRUE` or character, it +#' returns TRUE when data was assign properly to .pip env. FALSE, if it was +#' not assigned. +#' @export +#' +#' @examples +#' \dontrun{ +#' # One country, all years with default ppp_version = 2017 +#' res <- get_cp(country = "AGO") +#' +#' # All countries, povline = 1.9 +#' res <- get_cp(povline = 1.9) +#' +#' # All countries and years with default values +#' res <- get_cp() +#' } +get_cp <- function(country = "all", + povline = 2.15, # GC: default value like Stata + version = NULL, + ppp_version = 2017, # GC: default value like Stata + release_version = NULL, + api_version = "v1", + format = c("arrow", "rds", "json", "csv"), + simplify = TRUE, + server = NULL) { + + + # 0. Match args ---- + api_version <- match.arg(api_version) + format <- match.arg(format) + + # 1. povline set-up ---- + # (GC: stata equivalent but no 2005 and default to 2.15) + if (is.null(povline)) { + if (ppp_version == "2011") { + povline <- 1.9 + } + } + + + # 2. Build query string ---- + req <- build_request( + country = country, + povline = povline, + version = version, + ppp_version = ppp_version, + release_version = release_version, + format = format, + server = server, + api_version = api_version, + endpoint = "cp-download" + ) + + + # 3. Perform request ---- + res <- req |> + httr2::req_perform() + + # 4. Parse result and return + out <- parse_response(res, simplify) + + return(out) + +} diff --git a/R/get_cp_ki.R b/R/get_cp_ki.R new file mode 100644 index 0000000..a0ad624 --- /dev/null +++ b/R/get_cp_ki.R @@ -0,0 +1,125 @@ +#' Get Country Profiles Key Indicators +#' +#' @inheritParams get_stats +#' +#' @return If `simplify = FALSE`, it returns a list of class "pip_api". If +#' `simplify = TRUE`, it returns a tibble with the requested data. This is the +#' default. Only for `get_aux()`, If `assign_tb = TRUE` or character, it +#' returns TRUE when data was assign properly to .pip env. FALSE, if it was +#' not assigned. +#' @export +#' +#' @examples +#' \dontrun{ +#' # One country, all years with default ppp_version = 2017 +#' res <- get_cp(country = "IDN") +#' +#' # All countries, povline = 1.9 +#' res <- get_cp(country = "IDN", povline = 1.9) +#' +#' } +get_cp_ki <- function(country = NULL, + povline = 2.15, # GC: default value like Stata + version = NULL, + ppp_version = 2017, # GC: default value like Stata + release_version = NULL, + api_version = "v1", + format = c("arrow", "rds", "json", "csv"), + simplify = TRUE, + server = NULL) { + + + # 0. Match args ---- + api_version <- match.arg(api_version) + format <- match.arg(format) + + # 1. povline set-up ---- + # (GC: stata equivalent but no 2005 and default to 2.15) + if (is.null(povline)) { + if (ppp_version == "2011") { + povline <- 1.9 + } + } + + # 2. country set-up ---- + if (is.null(country)) { + cli::cli_abort("Please provide a country code.") + } + + if (length(country) > 1) { + cli::cli_abort("Please provide only one country code.") + } + + + # 2. Build query string ---- + req <- build_request( + country = country, + povline = povline, + version = version, + ppp_version = ppp_version, + release_version = release_version, + format = format, + server = server, + api_version = api_version, + endpoint = "cp-key-indicators" + ) + + + # 3. Perform request ---- + res <- req |> + httr2::req_perform() + + # 4. Parse result and return (if simplify == FALSE) + if (isFALSE(simplify)) { + out <- parse_response(res, simplify) + } else { + out <- parse_response(res, simplify) + + # 5. Unnest + out <- unnest_ki(out) + + } + + return(out) + +} + + +# Unnest key indicators ---- +#' Unnest the key indicators +#' @describeIn unnest_ki takes the simplified output from cp-key-indicators endpoint and unnests it. +#' +#' @param out parsed and simplified output from cp-key-indicators endpoint +#' +#' @return data frame, unnested. +#' +unnest_ki <- function(out){ + + # Step 1: Extract all data frames into individual variables + headcount <- out$headcount[[1]] + headcount_national <- out$headcount_national[[1]] + mpm_headcount <- out$mpm_headcount[[1]] + pop <- out$pop[[1]] + + gni <- out$gni[[1]] + gni <- gni[!duplicated(gni[c("country_code", "reporting_year")]), ] + + + gdp_growth <- out$gdp_growth[[1]] + gdp_growth <- gdp_growth[!duplicated(gdp_growth[c("country_code", "reporting_year")]), ] + + shared_prosperity <- out$shared_prosperity[[1]] + + # Step 2: Merge data frames on common columns + df_merged <- merge(headcount, headcount_national, by = c("country_code", "reporting_year"), all = TRUE) + df_merged <- merge(df_merged, mpm_headcount, by = c("country_code", "reporting_year"), all = TRUE) + df_merged <- merge(df_merged, pop, by = c("country_code", "reporting_year"), all = TRUE) + df_merged <- merge(df_merged, gni, by = c("country_code", "reporting_year"), all = TRUE) + df_merged <- merge(df_merged, gdp_growth, by = c("country_code", "reporting_year"), all = TRUE, suffixes = c("_gni", "_gdp")) + final_df <- merge(df_merged, shared_prosperity, by = "country_code", all = TRUE) + + # Step 3: return + return(final_df) + +} + diff --git a/R/get_gd.R b/R/get_gd.R new file mode 100644 index 0000000..d2c73db --- /dev/null +++ b/R/get_gd.R @@ -0,0 +1,174 @@ + +#' Get grouped stats +#' +#' Get grouped stats from the PIP API. +#' @inheritParams get_stats +#' @param cum_welfare numeric: Cumulative welfare values, expressed in shares. Any length. They should be monotonically increasing, and sum to 1. +#' @param cum_population numeric: Cumulative population values, expressed in shares. Any length. They should be monotonically increasing, and sum to 1. +#' @param estimate character: One of "stats", "lorenz", "params". +#' @param requested_mean numeric: Requested mean. +#' @param povline numeric: Poverty line. Required for estimate = "stats". +#' @param lorenz character: Lorenz curve methodology. Either "lb" or "lq". +#' @param n_bins numeric: Number of bins. Required for estimate = "lorenz". +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' +#' datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, +#' 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, +#' 1), +#' L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, +#' 0.23498, 0.34887, 051994, 0.64270, 0.79201, +#' 0.86966, 0.91277, 1) +#' +#' # estimate = 'stats': retrieve poverty statistics. +#' stats <- get_gd(cum_welfare = datt_data$L, cum_population = datt_data$p, +#' estimate = "stats", +#' requested_mean = 19, # default is 1. +#' povline = 2.15) # default is 1. +#' +#' # estimate = 'lorenz': retrieve Lorenz curve data points for a specified number of bins. +#' +#' ## Best lorenz curve methodolody selected by default: +#' lorenz <- get_gd(cum_welfare = datt_data$L, +#' cum_population = datt_data$p, +#' estimate = "lorenz", +#' n_bins = 100) # must be specified, default is NULL. +#' +#' ## Specify lorenz curve methodology: +#' ### Beta Lorenz ("lb") +#' lorenz_lb <- get_gd(cum_welfare = datt_data$L, +#' cum_population = datt_data$p, +#' estimate = "lorenz", +#' lorenz = "lb", +#' n_bins = 100) +#' +#' ### Quadratic Lorenz ("lq") +#' lorenz_lq <- get_gd(cum_welfare = datt_data$L, +#' cum_population = datt_data$p, +#' estimate = "lorenz", +#' lorenz = "lq", +#' n_bins = 100) +#' +#' # estimate = 'params': retrieve regression parameters used for the lorenz curve estimation. +#' params <- get_gd(cum_welfare = datt_data$L, +#' cum_population = datt_data$p, +#' estimate = "params") +#'} + +get_gd <- function(cum_welfare = NULL, + cum_population = NULL, + estimate = c("stats", "lorenz", "params"), + requested_mean = NULL, # stats specific. + povline = NULL, # stats specific. + popshare = NULL, # stats specific. + lorenz = NULL, # lorenz specific. + n_bins = NULL, # lorenz specific. + api_version = "v1", + format = c("rds", "json", "csv"), + simplify = TRUE, + server = NULL) { + + # 0. Match args ------------------------------------------------------------- + estimate <- match.arg(estimate) + api_version <- match.arg(api_version) + format <- match.arg(format) + + # 0. General Args checks (Error is general otherwise) ----------------------- + if (length(cum_welfare) != length(cum_population)) { + cli::cli_abort("{.val cum_welfare} and {.val cum_population} must have the same length.") + } + + # 1. endpoint = grouped-stats ------------------------------------------------ + if (estimate == "stats") { + + endpoint <- "grouped-stats" + + # 1.1 grouped-stats args checks ------ + if (is.null(requested_mean)) { + cli::cli_abort("For endpoint {endpoint}, {.val requested_mean} must be provided.") + } + if (is.null(povline) && is.null(popshare)) { + cli::cli_abort("For endpoint {.field endpoint}, you must provide either {.arg povline} or {.arg popshare} argument") + } + + # popshare can't be used together with povline + if (!is.null(popshare)) povline <- NULL + + # 1.2 Build request for grouped-stats ------ + req <- build_request( + cum_welfare = cum_welfare, + cum_population = cum_population, + requested_mean = requested_mean, + povline = povline, + popshare = popshare, + format = format, + server = server, + api_version = api_version, + endpoint = endpoint + ) + + # 1.3 Perform request ------ + res <- req |> + httr2::req_perform() + + # 1.4 Parse result ------ + out <- parse_response(res, simplify) + + } + + # 2. endpoint = lorenz-curve ------------------------------------------------ + if (estimate == "lorenz") { + + endpoint <- "lorenz-curve" + + # 2.1 Build request for lorenz-curve ------ + req <- build_request( + cum_welfare = cum_welfare, + cum_population = cum_population, + lorenz = lorenz, + n_bins = n_bins, + format = format, + server = server, + api_version = api_version, + endpoint = endpoint + ) + + # 2.2 Perform request ------ + res <- req |> + httr2::req_perform() + + # 2.3 Parse result ------ + out <- parse_response(res, simplify) + + } + + # 3. endpoint = regress-params ----------------------------------------------- + if (estimate == "params") { + + endpoint <- "regression-params" + + # 3.2 Build request for regress-params + req <- build_request( + cum_welfare = cum_welfare, + cum_population = cum_population, + format = format, + server = server, + api_version = api_version, + endpoint = endpoint + ) + + # 3.3 Perform request ------ + res <- req |> + httr2::req_perform() + + # 3.4 Parse result ------ + out <- parse_response(res, simplify) + + } + + return(out) + +} diff --git a/R/get_stats.R b/R/get_stats.R index f350cf3..afde51e 100644 --- a/R/get_stats.R +++ b/R/get_stats.R @@ -8,6 +8,7 @@ #' poverty line #' @param fill_gaps logical: If TRUE, will interpolate / extrapolate values for #' missing years +#' @param nowcast logical: If TRUE, will return nowcast estimates. #' @param subgroup character: If used result will be aggregated for predefined #' sub-groups. Either 'wb_regions' or 'none'. #' @param welfare_type character: Welfare type either of c("all", "income", "consumption") @@ -62,6 +63,7 @@ get_stats <- function(country = "all", povline = NULL, popshare = NULL, fill_gaps = FALSE, + nowcast = FALSE, subgroup = NULL, welfare_type = c("all", "income", "consumption"), reporting_level = c("all", "national", "urban", "rural"), @@ -69,7 +71,7 @@ get_stats <- function(country = "all", ppp_version = NULL, release_version = NULL, api_version = "v1", - format = c("rds", "json", "csv"), + format = c("arrow", "rds", "json", "csv"), simplify = TRUE, server = NULL) { # Match args @@ -77,11 +79,20 @@ get_stats <- function(country = "all", reporting_level <- match.arg(reporting_level) api_version <- match.arg(api_version) format <- match.arg(format) + # popshare can't be used together with povline if (!is.null(popshare)) povline <- NULL + # nowcast = TRUE -> fill_gaps = TRUE + if (nowcast) fill_gaps <- TRUE + + # otherwise we cannot filter correctly because estimate_type not returned + if (isFALSE(fill_gaps)) nowcast <- FALSE + + # subgroup can't be used together with fill_gaps if (!is.null(subgroup)) { fill_gaps <- NULL # subgroup can't be used together with fill_gaps + nowcast <- NULL # assuming this is the same for nowcast endpoint <- "pip-grp" subgroup <- match.arg(subgroup, c("none", "wb_regions")) if (subgroup == "wb_regions") { @@ -94,28 +105,40 @@ get_stats <- function(country = "all", group_by <- NULL } + # Build query string - args <- build_args( - .country = country, - .year = year, - .povline = povline, - .popshare = popshare, - .fill_gaps = fill_gaps, - .group_by = group_by, - .welfare_type = welfare_type, - .reporting_level = reporting_level, - .version = version, - .ppp_version = ppp_version, - .release_version = release_version, - .format = format + req <- build_request( + country = country, + year = year, + povline = povline, + popshare = popshare, + fill_gaps = fill_gaps, + nowcast = nowcast, + group_by = group_by, + welfare_type = welfare_type, + reporting_level = reporting_level, + version = version, + ppp_version = ppp_version, + release_version = release_version, + format = format, + server = server, + api_version = api_version, + endpoint = endpoint ) - u <- build_url(server, endpoint, api_version) - # Send query - res <- httr::GET(u, query = args, httr::user_agent(pipr_user_agent)) + + # Perform request + res <- req |> + httr2::req_perform() # Parse result out <- parse_response(res, simplify) + # Filter nowcast + ## (only when simplify == TRUE) because filtering happens after the request is returned. + if ( !is.null(nowcast) & isFALSE(nowcast) & simplify == TRUE) { + out <- out[!grepl("nowcast", out$estimate_type),] + } + return(out) } @@ -136,20 +159,21 @@ get_wb <- function(year = "all", format <- match.arg(format) # Build query string - args <- build_args( - .country = "all", - .year = year, - .povline = povline, - .group_by = "wb", - .version = version, - .ppp_version = ppp_version, - .release_version = release_version, - .format = format + req <- build_request( + year = year, + povline = povline, + group_by = "wb", + version = version, + ppp_version = ppp_version, + release_version = release_version, + format = format, + server = server, + api_version = api_version, + endpoint = "pip-grp" ) - u <- build_url(server, "pip-grp", api_version) - - # Send query - res <- httr::GET(u, query = args, httr::user_agent(pipr_user_agent)) + # Perform request + res <- req |> + httr2::req_perform() # Parse result out <- parse_response(res, simplify) diff --git a/R/other.R b/R/other.R index ba9d3e6..a4952db 100644 --- a/R/other.R +++ b/R/other.R @@ -24,8 +24,10 @@ check_api <- function(api_version = "v1", server = NULL) { #' } get_versions <- function(api_version = "v1", server = NULL, simplify = TRUE) { check_internet() - u <- build_url(server, "versions", api_version) - res <- httr::GET(u, httr::user_agent(pipr_user_agent)) + req <- build_request(server = server, + api_version = api_version, + endpoint = "versions") + res <- httr2::req_perform(req) parse_response(res, simplify = simplify) } @@ -41,7 +43,9 @@ get_versions <- function(api_version = "v1", server = NULL, simplify = TRUE) { #' } get_pip_info <- function(api_version = "v1", server = NULL) { check_internet() - u <- build_url(server, "pip-info", api_version) - res <- httr::GET(u, httr::user_agent(pipr_user_agent)) + req <- build_request(server = server, + api_version = api_version, + endpoint = "pip-info") + res <- httr2::req_perform(req) parse_response(res, simplify = FALSE)$content } diff --git a/R/utils.R b/R/utils.R index 6df7e68..670d5ad 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,8 @@ +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Parsing and checking functions ------------- +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# PR 63 + #' check_internet #' @noRd check_internet <- function() { @@ -8,11 +13,13 @@ check_internet <- function() { #' health_check #' @inheritParams check_api #' @noRd -health_check <- function(api_version, server = NULL) { - u <- build_url(server, "health-check", api_version) - res <- httr::GET(u) +health_check <- function(api_version = "v1", server = NULL) { + req <- build_request(server = server, + api_version = api_version, + endpoint = "health-check") + res <- httr2::req_perform(req) attempt::stop_if_not( - .x = httr::status_code(res), + .x = httr2::resp_status(res), .p = ~ .x == 200, msg = "Could not connect to the API" ) @@ -23,22 +30,15 @@ health_check <- function(api_version, server = NULL) { #' @param res A httr response #' @param parsed A parsed response #' @noRd -check_status <- function(res, parsed) { - if (res$status_code != 200) { - if ("error" %in% names(parsed)) { +check_status <- function(res) { + if (httr2::resp_is_error(res)) { msg1 <- paste( - httr::http_status(res$status_code)$message, - parsed$error, + httr2::resp_status_desc(res), "Use simplify = FALSE to see the full error response.", sep = "\n*\t") - } else { - msg1 <- paste( - httr::http_status(res$status_code)$message, - "Use simplify = FALSE to see the full error response.", - sep = "\n*\t") - } + attempt::stop_if_not( - .x = httr::status_code(res), + .x = httr2::resp_status(res), .p = ~ .x == 200, msg = msg1 ) @@ -46,13 +46,13 @@ check_status <- function(res, parsed) { invisible(TRUE) } -#' build_url +#' build_base_url #' @param server character: Server #' @param endpoint character: Endpoint #' @param api_version character: API version #' @inheritParams get_stats #' @noRd -build_url <- function(server, endpoint, api_version) { +build_base_url <- function(server, endpoint, api_version) { base_url <- select_base_url(server = server) sprintf("%s/%s/%s", base_url, api_version, endpoint) } @@ -112,24 +112,50 @@ build_args <- function(.country = NULL, #' @keywords internal parse_response <- function(res, simplify) { + # Classify the response url + res_health <- FALSE + if (grepl("health-check", res$url)) { + res_health <- TRUE + } + + pip_info <- FALSE + if (grepl("pip-info", res$url)) { + pip_info <- TRUE + } + # Get response type - type <- tryCatch(suppressWarnings(httr::http_type(res)), error = function(e) NULL) + type <- tryCatch(suppressWarnings(httr2::resp_content_type(res)), error = function(e) NULL) # Stop if response type is unknown attempt::stop_if(is.null(type), msg = "Invalid response format") + if (type == "application/vnd.apache.arrow.file") { + parsed <- arrow::read_feather(res$body) + # GC: right now arrow not working with grouped-stats, so I won't pivot. + } + if (type == "application/json") { - parsed <- jsonlite::fromJSON(httr::content(res, "text", encoding = "UTF-8")) + + if (res_health | pip_info) { + parsed <- jsonlite::fromJSON(httr2::resp_body_string(res, encoding = "UTF-8")) + } else { + parsed <- jsonlite::fromJSON(httr2::resp_body_string(res, encoding = "UTF-8")) + parsed <- change_grouped_stats_to_csv(parsed) # GC: used to pivot. + } } + if (type == "text/csv") { - parsed <- suppressMessages(httr::content(res, encoding = "UTF-8")) + parsed <- suppressMessages(vroom::vroom( + I(httr2::resp_body_string(res, encoding = "UTF-8"))) + ) } if (type == "application/rds") { - parsed <- unserialize(res$content) + parsed <- unserialize(res$body) + parsed <- change_grouped_stats_to_csv(parsed) # GC: used to pivot. } if (simplify) { - check_status(res, parsed) + httr2::resp_check_status(res, info = parsed$message) parsed <- tibble::as_tibble(parsed) # TEMP fix for renaming of columns # To be removed when pipapi#207 @@ -178,18 +204,213 @@ select_base_url <- function(server) { return(base_url) } +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Formatting functions ------------- +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +# PR 63 +#' rename columns in dataframe +#' +#' @param df data frame +#' @param oldnames character: old names +#' @param newnames character: new names +#' +#' @return data frame with new names +#' @keywords internal +rename_cols <- function(df, oldnames, newnames) { + + # _______________________________________ + # Defenses #### + stopifnot( exprs = { + is.data.frame(df) + length(oldnames) == length(newnames) + # all(oldnames %in% names(df)) + } + ) + + # ___________________________________________ + # Computations #### + df_names <- names(df) + + old_position <- which(oldnames %in% df_names) + old_available <- oldnames[old_position] + new_available <- newnames[old_position] + + tochange <- vector(length = length(old_available)) + + for (i in seq_along(old_available)) { + tochange[i] <- which(df_names %in% old_available[i]) + } + + names(df)[tochange] <- new_available + + + # ____________________________________________ + # Return #### + return(df) + +} + #' Rename columns #' TEMP function to rename response cols #' @param df A data.frame #' @param url response url #' @noRd tmp_rename_cols <- function(df, url = "") { - df <- data.table::setnames( - df, - old = c("survey_year", "reporting_year", "reporting_pop", "reporting_gdp", "reporting_pce", "pce_data_level"), - new = c("welfare_time", "year", "pop", "gdp", "hfce", "hfce_data_level"), - skip_absent = TRUE - ) + # PR 63 + oldnames = c( + "survey_year", + "reporting_year", + "reporting_pop", + "reporting_gdp", + "reporting_pce", + "pce_data_level" + ) - return(df) + newnames = c("welfare_time", + "year", + "pop", + "gdp", + "hfce", + "hfce_data_level") + + rename_cols(df,oldnames, newnames) +} + + +#' pip_is_transient +#' +#' Helper function to determine if an error is due to the number of requests +#' going over the rate limit +#' +#' @param resp A httr response +#' +#' @return logical +#' +pip_is_transient <- function(resp) { + if (httr2::resp_is_error(resp)) { + if (httr2::resp_status(resp) == 429) { + stringr::str_detect(httr2::resp_body_json(resp, check_type = FALSE)$message, + "Rate limit is exceeded") + } else { + FALSE + } + } else { + FALSE + } +} + +#' retry_after +#' +#' Helper function to determine how much time to wait before a new +#' query can be sent +#' +#' @param resp A httr response +#' +#' @return numeric +#' +retry_after <- function(resp) { + if (httr2::resp_is_error(resp)) { + time <- httr2::resp_body_json(resp, check_type = FALSE)$message + time <- stringr::str_remove(time, "Rate limit is exceeded. Try again in ") + readr::parse_number(time) + } else { + 0 + } +} + +#' parse_error_body +#' +#' Helper function to parse error messages generated by the PIP API +#' +#' @param resp A httr response +#' +#' @return character +#' +parse_error_body <- function(resp) { + if (httr2::resp_is_error(resp)) { + if (is_gateway_timeout(resp)) { + # Handle gateway timeout + return(httr2::resp_status_desc(resp)) + } else if (is_bad_gateway(resp)) { + # Handle bad gateway timeout + return(httr2::resp_status_desc(resp)) + } else { + # Handle regular PIP errors + out <- httr2::resp_body_json(resp) + message1 <- out$error[[1]] + message2 <- out$details[[1]]$msg[[1]] + message3 <- paste(unlist(out$details[[names(out$details)]]$valid), collapse = ", ") + message <- c(message1, message2, message3) + return(message) + } + } +} + +is_gateway_timeout <- function(resp) { + httr2::resp_status(resp) == 504 & + httr2::resp_status_desc(resp) == "Gateway Timeout" +} + +is_bad_gateway <- function(resp) { + httr2::resp_status(resp) == 502 & + httr2::resp_status_desc(resp) == "Bad Gateway" +} + +#' Deletes content of the cache folder +#' +#' +#' @return Side effect. Deletes files. +#' +#' @export +#' +#' @examples \dontrun{delete_cache()} +delete_cache <- function() { + + cached_files <- list.files(tools::R_user_dir("pipr", which = "cache"), + full.names = TRUE) + + if (length(cached_files) == 0) { + message("Cache is empty. Nothing to delete") + } else { + lapply(cached_files, file.remove) + message("All items have been deleted from the cache.") + } +} + +#' Provides some information about cached items +#' +#' +#' @return character. +#' +#' @export +#' +#' @examples +#' \dontrun{get_cache_info()} +get_cache_info <- function() { + + cache_path <- tools::R_user_dir("pipr", which = "cache") + n_cached <- length(list.files(cache_path)) + + if (n_cached > 1) { + message_text <- " API responses are currently cached in " + } else { + message_text <- " API response is currently cached in " + } + + message(cli::format_message(c("Cache status:", + "i" = paste0(n_cached, message_text, cache_path)))) +} + + +#' Change the list-output to dataframe (Function from pipapi) +#' +#' @param out output from wbpip::gd_compute_pip_stats +#' +#' @return dataframe +#' @export +change_grouped_stats_to_csv <- function(out) { + out[paste0("decile", seq_along(out$deciles))] <- out$deciles + out$deciles <- NULL + data.frame(out) } diff --git a/R/zzz.R b/R/zzz.R index f6a0d22..1387f4a 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,23 +1,23 @@ -.onLoad <- function(libname, pkgname) { - if (!Sys.getenv("PIPR_DISABLE_CACHING") == "TRUE") { - # d <- rappdirs::user_cache_dir("pipr") - # cm <- cachem::cache_disk(d, - # evict = "lru", - # max_size = 512 * 1024^2) - cm <- cachem::cache_mem(max_size = 512 * 1024^2, evict = "lru") - get_stats <<- memoise::memoise(get_stats, cache = cm) - get_wb <<- memoise::memoise(get_wb, cache = cm) - get_aux <<- memoise::memoise(get_aux, cache = cm) - get_versions <<- memoise::memoise(get_versions, cache = cm) - } - - options(cli.ignore_unknown_rstudio_theme = TRUE) - -} - -.onAttach <- function(libname, pkgname) { - if (!Sys.getenv("PIPR_DISABLE_CACHING") == "TRUE") { - packageStartupMessage("Info: Session based caching is enabled.") - } -} +# .onLoad <- function(libname, pkgname) { +# if (!Sys.getenv("PIPR_DISABLE_CACHING") == "TRUE") { +# # d <- rappdirs::user_cache_dir("pipr") +# # cm <- cachem::cache_disk(d, +# # evict = "lru", +# # max_size = 512 * 1024^2) +# cm <- cachem::cache_mem(max_size = 512 * 1024^2, evict = "lru") +# get_stats <<- memoise::memoise(get_stats, cache = cm) +# get_wb <<- memoise::memoise(get_wb, cache = cm) +# get_aux <<- memoise::memoise(get_aux, cache = cm) +# get_versions <<- memoise::memoise(get_versions, cache = cm) +# } +# +# options(cli.ignore_unknown_rstudio_theme = TRUE) +# +# } +# +# .onAttach <- function(libname, pkgname) { +# if (!Sys.getenv("PIPR_DISABLE_CACHING") == "TRUE") { +# packageStartupMessage("Info: Session based caching is enabled.") +# } +# } diff --git a/data-raw/datt.R b/data-raw/datt.R new file mode 100644 index 0000000..053a752 --- /dev/null +++ b/data-raw/datt.R @@ -0,0 +1,44 @@ +# Datt Data procedure ---- +# Data is from Sarveskhana, Vol IX n 4 (April 1986), same dataset used by Datt (1998). +# Function to calculate cumulative shares (p and L) +calculate_cumulative_shares <- function(data, area) { + # Step 1: Calculate cumulative population share (p) + data$p <- cumsum(data$percentage_of_persons) / sum(data$percentage_of_persons) + + # Step 2: Calculate class consumption + data$class_consumption <- data$mean_monthly_pc_exp * (data$percentage_of_persons / 100) + + # Step 3: Calculate cumulative welfare share (L) + total_consumption <- sum(data$class_consumption) + data$L <- cumsum(data$class_consumption) / total_consumption + + # Add area identifier + data$area <- area + + # Return the data frame with p, L, and area columns + return(data[, setdiff(names(data), "class_consumption")]) +} + +# Rural data setup +rural_data <- data.frame( + monthly_pc_exp = c("0 – 30", "30 – 40", "40 – 50", "50 – 60", "60 – 75", "75 – 85", "85 – 100", + "100 – 125", "125 – 150", "150 – 200", "200 – 250", "250 – 300", "300 & above"), + mean_monthly_pc_exp = c(24.4, 35.8, 45.36, 55.10, 64.92, 77.08, 91.75, 110.64, 134.9, 167.76, 215.48, 261.66, 384.97), + percentage_of_persons = c(0.92, 2.47, 5.11, 7.90, 9.69, 15.24, 13.64, 16.99, 10.00, 9.78, 3.96, 1.81, 2.49) +) + +# Urban data setup +urban_data <- data.frame( + monthly_pc_exp = c("0 – 30", "30 – 40", "40 – 50", "50 – 60", "60 – 75", "75 – 85", "85 – 100", + "100 – 125", "125 – 150", "150 – 200", "200 – 250", "250 – 300", "300 & above"), + mean_monthly_pc_exp = c(21.87, 35.8, 45.63, 55.46, 65.13, 77.2, 92.26, 111.41, 136.27, 170.13, 219.76, 267.33, 424.6), + percentage_of_persons = c(0.21, 0.40, 1.01, 1.40, 3.92, 9.21, 10.64, 13.13, 11.17, 8.75, 9.16, 5.92, 9.32) +) + +# Apply function to rural and urban data +datt_rural <- calculate_cumulative_shares(rural_data, "rural") +datt_urban <- calculate_cumulative_shares(urban_data, "urban") + + +# Usethis +usethis::use_data(datt_rural, datt_urban, overwrite = TRUE) diff --git a/data/datt_rural.rda b/data/datt_rural.rda new file mode 100644 index 0000000..918e14b Binary files /dev/null and b/data/datt_rural.rda differ diff --git a/data/datt_urban.rda b/data/datt_urban.rda new file mode 100644 index 0000000..2b48be9 Binary files /dev/null and b/data/datt_urban.rda differ diff --git a/man/build_request.Rd b/man/build_request.Rd new file mode 100644 index 0000000..5d6b647 --- /dev/null +++ b/man/build_request.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_request.R +\name{build_request} +\alias{build_request} +\title{Build request version 2} +\usage{ +build_request(server, api_version, endpoint, ...) +} +\arguments{ +\item{server}{character: Server. For WB internal use only} + +\item{api_version}{character: API version} + +\item{endpoint}{character: PIP API endpoint} + +\item{...}{other parameters} +} +\value{ +httr2 request +} +\description{ +Build request version 2 +} diff --git a/man/build_request_old.Rd b/man/build_request_old.Rd new file mode 100644 index 0000000..75d3f02 --- /dev/null +++ b/man/build_request_old.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_request.R +\name{build_request_old} +\alias{build_request_old} +\title{build_request, OLD version} +\usage{ +build_request_old(server, api_version, endpoint, ...) +} +\arguments{ +\item{server}{character: Server. For WB internal use only} + +\item{api_version}{character: API version} + +\item{endpoint}{character: PIP API endpoint} + +\item{...}{other parameters} +} +\value{ +httr2 request +} +\description{ +build_request, OLD version +} diff --git a/man/call_aux.Rd b/man/call_aux.Rd index 6487b0e..37bfbd4 100644 --- a/man/call_aux.Rd +++ b/man/call_aux.Rd @@ -19,11 +19,11 @@ call a table from .pip env \examples{ # call one table -get_aux("gdp", assign_tb = TRUE, force = TRUE) +get_aux("gdp", assign_tb = TRUE, replace = TRUE) # PR 63 call_aux("gdp") # see the name of several tables in memory tb <- c("cpi", "ppp", "pop") -lapply(tb, get_aux, assign_tb = TRUE, force = TRUE) +lapply(tb, get_aux, assign_tb = TRUE, replace = TRUE) # PR 63 call_aux() } diff --git a/man/change_grouped_stats_to_csv.Rd b/man/change_grouped_stats_to_csv.Rd new file mode 100644 index 0000000..353b439 --- /dev/null +++ b/man/change_grouped_stats_to_csv.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{change_grouped_stats_to_csv} +\alias{change_grouped_stats_to_csv} +\title{Change the list-output to dataframe (Function from pipapi)} +\usage{ +change_grouped_stats_to_csv(out) +} +\arguments{ +\item{out}{output from wbpip::gd_compute_pip_stats} +} +\value{ +dataframe +} +\description{ +Change the list-output to dataframe (Function from pipapi) +} diff --git a/man/datt_rural.Rd b/man/datt_rural.Rd new file mode 100644 index 0000000..36fc749 --- /dev/null +++ b/man/datt_rural.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{datt_rural} +\alias{datt_rural} +\title{Datt (1998) grouped data for rural india, 1983} +\format{ +A data frame with 13 observations on the following 6 variables: +\describe{ +\item{monthly_pc_exp}{Welfare range class} +\item{mean_monthly_pc_exp}{Mean welfare for given welfare range class} +\item{percentage_of_persons}{Percentage of individuals in given welfare class} +\item{L}{Cumulative welfare} +\item{p}{Cumulative population} +\item{area}{rural} +} + +@source Datt, G. (1998). See get_cp vignette. +} +\usage{ +datt_rural +} +\description{ +Dataset from Datt (1998) with grouped data for rural India in 1983. +} +\keyword{datasets} diff --git a/man/datt_urban.Rd b/man/datt_urban.Rd new file mode 100644 index 0000000..a089213 --- /dev/null +++ b/man/datt_urban.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{datt_urban} +\alias{datt_urban} +\title{Grouped data for urban india, 1983} +\format{ +A data frame with 13 observations on the following 6 variables: +\describe{ +\item{monthly_pc_exp}{Welfare range class} +\item{mean_monthly_pc_exp}{Mean welfare for given welfare range class} +\item{percentage_of_persons}{Percentage of individuals in given welfare class} +\item{L}{Cumulative welfare} +\item{p}{Cumulative population} +\item{area}{urban} +} + +@source Sarvekshana N26 Vol 9 N 4, and Datt, G. (1998) for methodology. See get_cp vignette. +} +\usage{ +datt_urban +} +\description{ +Dataset from Sarvekshana N26 Vol 9 N 4, created by the author following +Datt(1998) methodology with grouped data for urban India in 1983. +} +\keyword{datasets} diff --git a/man/delete_cache.Rd b/man/delete_cache.Rd new file mode 100644 index 0000000..7f004bd --- /dev/null +++ b/man/delete_cache.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{delete_cache} +\alias{delete_cache} +\title{Deletes content of the cache folder} +\usage{ +delete_cache() +} +\value{ +Side effect. Deletes files. +} +\description{ +Deletes content of the cache folder +} +\examples{ +\dontrun{delete_cache()} +} diff --git a/man/get_aux.Rd b/man/get_aux.Rd index 60209ed..76d78f1 100644 --- a/man/get_aux.Rd +++ b/man/get_aux.Rd @@ -27,7 +27,7 @@ get_aux( simplify = TRUE, server = NULL, assign_tb = FALSE, - force = FALSE + replace = FALSE ) get_countries( @@ -170,7 +170,8 @@ If \code{FALSE} no assignment will performed. If \code{TRUE}, the table will be assigned to exactly the same name as the one of the desired table. If character, the table will be assigned to that name.} -\item{force}{logical: force replacement. Default is FALSE} +\item{replace}{logical: force replacement of aux files in \code{.pip} env. Default +is FALSE.} } \value{ If \code{simplify = FALSE}, it returns a list of class "pip_api". If diff --git a/man/get_cache_info.Rd b/man/get_cache_info.Rd new file mode 100644 index 0000000..e09c361 --- /dev/null +++ b/man/get_cache_info.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_cache_info} +\alias{get_cache_info} +\title{Provides some information about cached items} +\usage{ +get_cache_info() +} +\value{ +character. +} +\description{ +Provides some information about cached items +} +\examples{ +\dontrun{get_cache_info()} +} diff --git a/man/get_cp.Rd b/man/get_cp.Rd new file mode 100644 index 0000000..aff98aa --- /dev/null +++ b/man/get_cp.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cp.R +\name{get_cp} +\alias{get_cp} +\title{Get Country Profiles} +\usage{ +get_cp( + country = "all", + povline = 2.15, + version = NULL, + ppp_version = 2017, + release_version = NULL, + api_version = "v1", + format = c("arrow", "rds", "json", "csv"), + simplify = TRUE, + server = NULL +) +} +\arguments{ +\item{country}{character: A vector with one or more \href{https://wits.worldbank.org/wits/wits/witshelp/content/codes/country_codes.htm}{country ISO 3 codes} or +'all'} + +\item{povline}{numeric: Poverty line} + +\item{version}{character: Data version. See \code{get_versions()}} + +\item{ppp_version}{ppp year to be used} + +\item{release_version}{date when the data was published in YYYYMMDD format} + +\item{api_version}{character: API version} + +\item{format}{character: Response format either of c("rds", "json", "csv")} + +\item{simplify}{logical: If TRUE (the default) the response is returned as a +\code{tibble}} + +\item{server}{character: Server. For WB internal use only} +} +\value{ +If \code{simplify = FALSE}, it returns a list of class "pip_api". If +\code{simplify = TRUE}, it returns a tibble with the requested data. This is the +default. Only for \code{get_aux()}, If \code{assign_tb = TRUE} or character, it +returns TRUE when data was assign properly to .pip env. FALSE, if it was +not assigned. +} +\description{ +Get Country Profiles +} +\examples{ +\dontrun{ +# One country, all years with default ppp_version = 2017 +res <- get_cp(country = "AGO") + +# All countries, povline = 1.9 +res <- get_cp(povline = 1.9) + +# All countries and years with default values +res <- get_cp() +} +} diff --git a/man/get_cp_ki.Rd b/man/get_cp_ki.Rd new file mode 100644 index 0000000..0847ff5 --- /dev/null +++ b/man/get_cp_ki.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cp_ki.R +\name{get_cp_ki} +\alias{get_cp_ki} +\title{Get Country Profiles Key Indicators} +\usage{ +get_cp_ki( + country = NULL, + povline = 2.15, + version = NULL, + ppp_version = 2017, + release_version = NULL, + api_version = "v1", + format = c("arrow", "rds", "json", "csv"), + simplify = TRUE, + server = NULL +) +} +\arguments{ +\item{country}{character: A vector with one or more \href{https://wits.worldbank.org/wits/wits/witshelp/content/codes/country_codes.htm}{country ISO 3 codes} or +'all'} + +\item{povline}{numeric: Poverty line} + +\item{version}{character: Data version. See \code{get_versions()}} + +\item{ppp_version}{ppp year to be used} + +\item{release_version}{date when the data was published in YYYYMMDD format} + +\item{api_version}{character: API version} + +\item{format}{character: Response format either of c("rds", "json", "csv")} + +\item{simplify}{logical: If TRUE (the default) the response is returned as a +\code{tibble}} + +\item{server}{character: Server. For WB internal use only} +} +\value{ +If \code{simplify = FALSE}, it returns a list of class "pip_api". If +\code{simplify = TRUE}, it returns a tibble with the requested data. This is the +default. Only for \code{get_aux()}, If \code{assign_tb = TRUE} or character, it +returns TRUE when data was assign properly to .pip env. FALSE, if it was +not assigned. +} +\description{ +Get Country Profiles Key Indicators +} +\examples{ +\dontrun{ +# One country, all years with default ppp_version = 2017 +res <- get_cp(country = "IDN") + +# All countries, povline = 1.9 +res <- get_cp(country = "IDN", povline = 1.9) + +} +} diff --git a/man/get_gd.Rd b/man/get_gd.Rd new file mode 100644 index 0000000..07fd295 --- /dev/null +++ b/man/get_gd.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_gd.R +\name{get_gd} +\alias{get_gd} +\title{Get grouped stats} +\usage{ +get_gd( + cum_welfare = NULL, + cum_population = NULL, + estimate = c("stats", "lorenz", "params"), + requested_mean = NULL, + povline = NULL, + popshare = NULL, + lorenz = NULL, + n_bins = NULL, + api_version = "v1", + format = c("rds", "json", "csv"), + simplify = TRUE, + server = NULL +) +} +\arguments{ +\item{cum_welfare}{numeric: Cumulative welfare values, expressed in shares. Any length. They should be monotonically increasing, and sum to 1.} + +\item{cum_population}{numeric: Cumulative population values, expressed in shares. Any length. They should be monotonically increasing, and sum to 1.} + +\item{estimate}{character: One of "stats", "lorenz", "params".} + +\item{requested_mean}{numeric: Requested mean.} + +\item{povline}{numeric: Poverty line. Required for estimate = "stats".} + +\item{popshare}{numeric: Proportion of the population living below the +poverty line} + +\item{lorenz}{character: Lorenz curve methodology. Either "lb" or "lq".} + +\item{n_bins}{numeric: Number of bins. Required for estimate = "lorenz".} + +\item{api_version}{character: API version} + +\item{format}{character: Response format either of c("rds", "json", "csv")} + +\item{simplify}{logical: If TRUE (the default) the response is returned as a +\code{tibble}} + +\item{server}{character: Server. For WB internal use only} +} +\description{ +Get grouped stats from the PIP API. +} +\examples{ +\dontrun{ + +datt_data <- data.frame(p = c(0.0092, 0.0339, 0.0850, 0.160, 0.2609, 0.4133, + 0.5497, 0.7196, 0.8196, 0.9174, 0.9570, 0.9751, + 1), + L = c(0.00208, 0.001013, 0.03122, 0.07083, 0.12808, + 0.23498, 0.34887, 051994, 0.64270, 0.79201, + 0.86966, 0.91277, 1) + +# estimate = 'stats': retrieve poverty statistics. +stats <- get_gd(cum_welfare = datt_data$L, cum_population = datt_data$p, + estimate = "stats", + requested_mean = 19, # default is 1. + povline = 2.15) # default is 1. + +# estimate = 'lorenz': retrieve Lorenz curve data points for a specified number of bins. + +## Best lorenz curve methodolody selected by default: +lorenz <- get_gd(cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "lorenz", + n_bins = 100) # must be specified, default is NULL. + +## Specify lorenz curve methodology: +### Beta Lorenz ("lb") +lorenz_lb <- get_gd(cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "lorenz", + lorenz = "lb", + n_bins = 100) + +### Quadratic Lorenz ("lq") +lorenz_lq <- get_gd(cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "lorenz", + lorenz = "lq", + n_bins = 100) + +# estimate = 'params': retrieve regression parameters used for the lorenz curve estimation. +params <- get_gd(cum_welfare = datt_data$L, + cum_population = datt_data$p, + estimate = "params") +} +} diff --git a/man/get_stats.Rd b/man/get_stats.Rd index 921f9f9..daf1df2 100644 --- a/man/get_stats.Rd +++ b/man/get_stats.Rd @@ -11,6 +11,7 @@ get_stats( povline = NULL, popshare = NULL, fill_gaps = FALSE, + nowcast = FALSE, subgroup = NULL, welfare_type = c("all", "income", "consumption"), reporting_level = c("all", "national", "urban", "rural"), @@ -18,7 +19,7 @@ get_stats( ppp_version = NULL, release_version = NULL, api_version = "v1", - format = c("rds", "json", "csv"), + format = c("arrow", "rds", "json", "csv"), simplify = TRUE, server = NULL ) @@ -49,6 +50,8 @@ poverty line} \item{fill_gaps}{logical: If TRUE, will interpolate / extrapolate values for missing years} +\item{nowcast}{logical: If TRUE, will return nowcast estimates.} + \item{subgroup}{character: If used result will be aggregated for predefined sub-groups. Either 'wb_regions' or 'none'.} diff --git a/man/parse_error_body.Rd b/man/parse_error_body.Rd new file mode 100644 index 0000000..bf50e2d --- /dev/null +++ b/man/parse_error_body.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{parse_error_body} +\alias{parse_error_body} +\title{parse_error_body} +\usage{ +parse_error_body(resp) +} +\arguments{ +\item{resp}{A httr response} +} +\value{ +character +} +\description{ +Helper function to parse error messages generated by the PIP API +} diff --git a/man/pip_is_transient.Rd b/man/pip_is_transient.Rd new file mode 100644 index 0000000..738bcd1 --- /dev/null +++ b/man/pip_is_transient.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{pip_is_transient} +\alias{pip_is_transient} +\title{pip_is_transient} +\usage{ +pip_is_transient(resp) +} +\arguments{ +\item{resp}{A httr response} +} +\value{ +logical +} +\description{ +Helper function to determine if an error is due to the number of requests +going over the rate limit +} diff --git a/man/rename_cols.Rd b/man/rename_cols.Rd new file mode 100644 index 0000000..9443ad3 --- /dev/null +++ b/man/rename_cols.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{rename_cols} +\alias{rename_cols} +\title{rename columns in dataframe} +\usage{ +rename_cols(df, oldnames, newnames) +} +\arguments{ +\item{df}{data frame} + +\item{oldnames}{character: old names} + +\item{newnames}{character: new names} +} +\value{ +data frame with new names +} +\description{ +rename columns in dataframe +} +\keyword{internal} diff --git a/man/retry_after.Rd b/man/retry_after.Rd new file mode 100644 index 0000000..805973f --- /dev/null +++ b/man/retry_after.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{retry_after} +\alias{retry_after} +\title{retry_after} +\usage{ +retry_after(resp) +} +\arguments{ +\item{resp}{A httr response} +} +\value{ +numeric +} +\description{ +Helper function to determine how much time to wait before a new +query can be sent +} diff --git a/man/set_aux.Rd b/man/set_aux.Rd index 2d097fe..9d09400 100644 --- a/man/set_aux.Rd +++ b/man/set_aux.Rd @@ -4,14 +4,14 @@ \alias{set_aux} \title{Set auxiliary table in .pip environment for later call} \usage{ -set_aux(table, value, force = FALSE) +set_aux(table, value, replace = FALSE) } \arguments{ \item{table}{character: name of the table in .pip env} \item{value}{data to be saved} -\item{force}{logical: force replacement. Default is FALSE} +\item{replace}{logical.} } \value{ Invisible TRUE if set correctly. FALSE otherwise diff --git a/man/unnest_ki.Rd b/man/unnest_ki.Rd new file mode 100644 index 0000000..3f2174d --- /dev/null +++ b/man/unnest_ki.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cp_ki.R +\name{unnest_ki} +\alias{unnest_ki} +\title{Unnest the key indicators} +\usage{ +unnest_ki(out) +} +\arguments{ +\item{out}{parsed and simplified output from cp-key-indicators endpoint} +} +\value{ +data frame, unnested. +} +\description{ +Unnest the key indicators +} +\section{Functions}{ +\itemize{ +\item \code{unnest_ki()}: takes the simplified output from cp-key-indicators endpoint and unnests it. + +}} diff --git a/tests/testthat/test-caching.R b/tests/testthat/test-caching.R index ca55a93..0275c0b 100644 --- a/tests/testthat/test-caching.R +++ b/tests/testthat/test-caching.R @@ -1,33 +1,33 @@ -library(callr) - -test_that("Caching is enabled by default", { - skip_on_cran() - # Setup external R session - r <- callr::r_session$new(options = callr::r_session_options(user_profile = FALSE)) - r$run(function() Sys.setenv("PIPR_DISABLE_CACHING" = "FALSE")) - r$run(function() library(pipr)) - # Check that main functions are cached - tmp <- r$run(function() memoise::is.memoised(get_stats)) - expect_true(tmp) - tmp <- r$run(function() memoise::is.memoised(get_wb)) - expect_true(tmp) - tmp <- r$run(function() memoise::is.memoised(get_aux)) - expect_true(tmp) - r$kill() -}) - -test_that("Caching can be disabled", { - skip_on_cran() - # Setup external R session - r <- callr::r_session$new(options = callr::r_session_options(user_profile = FALSE)) - r$run(function() Sys.setenv("PIPR_DISABLE_CACHING" = "TRUE")) - r$run(function() library(pipr)) - # Check that main functions are NOT cached - tmp <- r$run(function() memoise::is.memoised(get_stats)) - expect_false(tmp) - tmp <- r$run(function() memoise::is.memoised(get_wb)) - expect_false(tmp) - tmp <- r$run(function() memoise::is.memoised(get_aux)) - expect_false(tmp) - r$kill() -}) +# library(callr) +# +# test_that("Caching is enabled by default", { +# skip_on_cran() +# # Setup external R session +# r <- callr::r_session$new(options = callr::r_session_options(user_profile = FALSE)) +# r$run(function() Sys.setenv("PIPR_DISABLE_CACHING" = "FALSE")) +# r$run(function() library(pipr)) +# # Check that main functions are cached +# tmp <- r$run(function() memoise::is.memoised(get_stats)) +# expect_true(tmp) +# tmp <- r$run(function() memoise::is.memoised(get_wb)) +# expect_true(tmp) +# tmp <- r$run(function() memoise::is.memoised(get_aux)) +# expect_true(tmp) +# r$kill() +# }) +# +# test_that("Caching can be disabled", { +# skip_on_cran() +# # Setup external R session +# r <- callr::r_session$new(options = callr::r_session_options(user_profile = FALSE)) +# r$run(function() Sys.setenv("PIPR_DISABLE_CACHING" = "TRUE")) +# r$run(function() library(pipr)) +# # Check that main functions are NOT cached +# tmp <- r$run(function() memoise::is.memoised(get_stats)) +# expect_false(tmp) +# tmp <- r$run(function() memoise::is.memoised(get_wb)) +# expect_false(tmp) +# tmp <- r$run(function() memoise::is.memoised(get_aux)) +# expect_false(tmp) +# r$kill() +# }) diff --git a/tests/testthat/test-display_aux.R b/tests/testthat/test-display_aux.R index c6e6fb8..c136947 100644 --- a/tests/testthat/test-display_aux.R +++ b/tests/testthat/test-display_aux.R @@ -4,8 +4,10 @@ test_that("returns proper table", { api_version <- "v1" simplify <- TRUE server <- NULL - u <- build_url(server, "aux", api_version = api_version) - res <- httr::GET(u) + req <- build_request(server = server, + api_version = api_version, + endpoint = "aux") + res <- httr2::req_perform(req) tbs_tb <- parse_response(res, simplify = simplify) tt <- suppressMessages(display_aux(server = server, simplify = simplify, api_version = api_version)) diff --git a/tests/testthat/test-get_aux.R b/tests/testthat/test-get_aux.R index eee97b0..7b54e58 100644 --- a/tests/testthat/test-get_aux.R +++ b/tests/testthat/test-get_aux.R @@ -38,30 +38,15 @@ test_that("get_aux() works when calling specific tables", { expect_true(tibble::is_tibble(res)) # Check failure if table doesn't exist - # TO DO: Use prod server for this test when API has been released - # expect_error(get_aux("tmp")) - # expect_true(is.list(get_aux("tmp", simplify = FALSE))) skip_if(Sys.getenv("PIPR_RUN_LOCAL_TESTS") != "TRUE") expect_error(get_aux("wrong-table-name", server = "qa")) - expect_true(is.list(get_aux("wrong-table-name", simplify = FALSE, server = "prod"))) # Check all tables - skip("survey_metadata gives a 500 error. Need to add functionality for list data") - dl <- lapply(res, function(x) try(get_aux(x))) + # skip("survey_metadata gives a 500 error. Need to add functionality for list data") + res <- get_aux() + dl <- lapply(res$tables, function(x) try(get_aux(x))) expect_true(all(sapply(dl, tibble::is_tibble))) - expect_true(sapply(dl, function(x) any(class(x) != "try-error"))) - # expect_false(sapply(dl, function(x) any(names(x) == "error"))) -}) - -test_that("User agent works", { - skip_if_offline() - skip_on_cran() - # res <- get_aux(simplify = FALSE) - # tmp <- res$response$request$options$useragent - # expect_identical(tmp, pipr_user_agent) - res <- get_aux("gdp", simplify = FALSE) - tmp <- res$response$request$options$useragent - expect_identical(tmp, pipr_user_agent) + expect_true(all(sapply(dl, function(x) any(class(x) != "try-error")))) }) ## Test helper functions ---- @@ -82,84 +67,3 @@ test_that("get_regions() works", { expect_true(tibble::is_tibble(res)) expect_identical(res, res2) }) - - -# test_that("get_countries() with mocking works", { -# mockery::stub(get_aux, "httr::GET", function(...) { -# readRDS(test_path("testdata", "response-country.RDS")) -# }) -# -# mockery::stub(get_countries, "get_aux", function(...) { -# readRDS(test_path("testdata", "response-country.RDS")) -# }) -# -# res1 <- get_aux("countries") -# res2 <- parse_response(get_countries(), TRUE) -# -# expect_true(tibble::is_tibble(res1)) -# expect_true(tibble::is_tibble(res2)) -# -# expect_equal(dim(res1), dim(res2)) -# expect_identical(res1, res2) -# }) -# -# -# test_that("get_regions() with mocking works", { -# mockery::stub(get_aux, "httr::GET", function(...) { -# readRDS(test_path("testdata", "response-regions.RDS")) -# }) -# -# mockery::stub(get_regions, "get_aux", function(...) { -# readRDS(test_path("testdata", "response-regions.RDS")) -# }) -# -# res1 <- get_aux("regions") -# res2 <- parse_response(get_regions(), TRUE) -# -# expect_true(tibble::is_tibble(res1)) -# expect_true(tibble::is_tibble(res2)) -# -# expect_equal(dim(res1), dim(res2)) -# expect_identical(res1, res2) -# }) -# -# -# test_that("get_cpi() with mocking works", { -# mockery::stub(get_aux, "httr::GET", function(...) { -# readRDS(test_path("testdata", "response-cpi.RDS")) -# }) -# -# mockery::stub(get_cpi, "get_aux", function(...) { -# readRDS(test_path("testdata", "response-cpi.RDS")) -# }) -# -# res1 <- get_aux("cpi") -# res2 <- parse_response(get_cpi(), TRUE) -# -# expect_true(tibble::is_tibble(res1)) -# expect_true(tibble::is_tibble(res2)) -# -# expect_equal(dim(res1), dim(res2)) -# expect_identical(res1, res2) -# }) -# -# test_that("get_dictionary() with mocking works", { -# #Waiting for this PR to be merged https://github.com/worldbank/pipr/pull/43 -# #so that get_dictionary() works -# mockery::stub(get_aux, "httr::GET", function(...) { -# readRDS(test_path("testdata", "response-dictionary.RDS")) -# }) -# -# mockery::stub(get_dictionary, "get_aux", function(...) { -# readRDS(test_path("testdata", "response-dictionary.RDS")) -# }) -# -# res1 <- get_aux("dictionary") -# res2 <- parse_response(get_dictionary(), TRUE) -# -# expect_true(tibble::is_tibble(res1)) -# expect_true(tibble::is_tibble(res2)) -# -# expect_equal(dim(res1), dim(res2)) -# expect_identical(res1, res2) -# }) diff --git a/tests/testthat/test-get_cp.R b/tests/testthat/test-get_cp.R new file mode 100644 index 0000000..e7e627d --- /dev/null +++ b/tests/testthat/test-get_cp.R @@ -0,0 +1,55 @@ +# 1. General Argument Matching Tests ---- +test_that("Argument matching works correctly for get_cp()", { + skip_if_offline() + skip_on_cran() + # Default arguments, one country, simplify = TRUE + res <- get_cp(country = "AGO") + expect_type(res, "list") # Default simplify = TRUE returns a data.frame + + # Default arguments, one country, simplify = FALSE + res <- get_cp(country = "AGO", simplify = FALSE) + expect_s3_class(res, "pip_api") + + # Argument matching for 'format' + expect_error(get_cp(format = "txt"), "'arg' should be one of") + + # Argument matching for 'api_version' + expect_error(get_cp(api_version = "v2"), "'arg' should be") + +}) + +# 2. povline Set-up Tests ---- +test_that("povline and ppp_version arguments work correctly", { + skip_if_offline() + skip_on_cran() + # Default povline + res <- get_cp(country = "AGO") + expect_true(any(res$poverty_line == 2.15)) + + res <- get_cp(country = "AGO", ppp_version = 2017) + expect_true(any(res$poverty_line == 2.15)) + + # povline with ppp_version 2011 + res <- get_cp(country = "AGO", ppp_version = 2011, povline = NULL) + expect_true(any(res$poverty_line == 1.9)) + + # povline when povline is provided + res <- get_cp(country = "AGO", povline = 3.2) + expect_true(any(res$poverty_line == 3.2)) +}) + + +# 3. Other Tests ---- +test_that("Requests execute successfully for get_cp()", { + skip_if_offline() + skip_on_cran() + + # Check that the response for invalid country throws an error + expect_error(get_cp(country = "INVALID"), "404") + + # All countries with a povline + res <- get_cp(country = "all", povline = 2.15) + expect_true(is.data.frame(res) || inherits(res, "pip_api")) +}) + + diff --git a/tests/testthat/test-get_cp_ki.R b/tests/testthat/test-get_cp_ki.R new file mode 100644 index 0000000..16db725 --- /dev/null +++ b/tests/testthat/test-get_cp_ki.R @@ -0,0 +1,97 @@ + +# 1. General Argument Matching Tests ---- +test_that("Argument matching works correctly for get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Default arguments, one country, simplify = TRUE + res <- get_cp_ki(country = "IDN") + expect_type(res, "list") + + # Default arguments, one country, simplify = FALSE + res <- get_cp_ki(country = "IDN", simplify = FALSE) + expect_s3_class(res, "pip_api") + + # Argument matching for 'format' + expect_error(get_cp_ki(format = "txt"), "'arg' should be one of") + + # Argument matching for 'api_version' + expect_error(get_cp_ki(api_version = "v2"), "'arg' should be") +}) + +# 2. povline Set-up Tests ---- +test_that("povline and ppp_version arguments work correctly for get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Default povline + res <- get_cp_ki(country = "IDN") + expect_true(any(res$poverty_line == 2.15)) + + res <- get_cp_ki(country = "IDN", ppp_version = 2017) + expect_true(any(res$poverty_line == 2.15)) + + # povline with ppp_version 2011 + res <- get_cp_ki(country = "IDN", ppp_version = 2011, povline = NULL) + expect_true(any(res$poverty_line == 1.9)) + + # povline when povline is provided + res <- get_cp_ki(country = "IDN", povline = 3.2) + expect_true(any(res$poverty_line == 3.2)) +}) + +# 3. Country Argument Tests ---- +test_that("Country argument validation works correctly in get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Valid country + res <- get_cp_ki(country = "IDN") + expect_type(res, "list") + + # Missing country argument + expect_error(get_cp_ki(country = NULL), "Please provide a country code.") + + # More than one country + expect_error(get_cp_ki(country = c("IDN", "AGO")), "Please provide only one country code.") +}) + + + +# 4. Request Execution Tests ---- +test_that("Requests threws error for get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Check that the response for invalid country throws an error + expect_error(get_cp_ki(country = "INVALID"), "404") +}) + +# 6. Response Parsing and Unnesting Tests ---- +test_that("Response parsing and unnesting work correctly for get_cp_ki()", { + skip_if_offline() + skip_on_cran() + + # Build unnested dataset manually + res_manual <- data.frame( + country_code = "IDN", + reporting_year = 2023, + poverty_line = 2.15, + headcount = 0.0182, + headcount_national = 9.4, + mpm_headcount = 0.0214, + reporting_pop = 277.5341, + gni = 4870, + latest_gni = TRUE, + gdp_growth = 5.0481, + latest_gdp = TRUE, + year_range = "2018-2023", + share_below_40 = 2.7768, + share_total = 1.8967, + stringsAsFactors = FALSE + ) + + res <- get_cp_ki(country = "IDN", simplify = TRUE) + expect_equal(res, res_manual) +}) + diff --git a/tests/testthat/test-get_gd.R b/tests/testthat/test-get_gd.R new file mode 100644 index 0000000..4993a08 --- /dev/null +++ b/tests/testthat/test-get_gd.R @@ -0,0 +1,114 @@ +# Section 1: General Argument Checks ---- +test_that("General Argument Checks", { + skip_if_offline() + skip_on_cran() + + # Check if cum_welfare and cum_population have different lengths + cum_welfare <- c(0.1, 0.2, 0.3) + cum_population <- c(0.1, 0.2) + + expect_error( + get_gd(cum_welfare = cum_welfare, cum_population = cum_population, estimate = "stats"), + "must have the same length" + ) +}) + +## Section 2: Testing 'stats' Endpoint ----- +test_that("'stats' Endpoint Tests", { + skip_if_offline() + skip_on_cran() + # Error when requested_mean or povline is missing + cum_welfare <- datt_rural$L + cum_population <- datt_rural$p + + # Missing requested_mean + expect_error( + get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "stats", povline = 89), + "must be provided" + ) + + # Missing povline + expect_error( + get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "stats", requested_mean = 109.9), + "you must provide" + ) + + # Correct retrieval of statistics + stats <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "stats", requested_mean = 109.9, povline = 89) + + expect_s3_class(stats, "data.frame") # The returned object should be a data frame + #expect_true(all(c("poverty_line", "mean", "headcount") %in% names(stats))) + + # popshare + stats <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "stats", requested_mean = 109.9, popshare = 0.5, + povline = 89) + + expect_true(stats$headcount == 0.5) +}) + +### Section 3: Testing 'lorenz' Endpoint ---- +test_that("'lorenz' Endpoint Tests", { + skip_if_offline() + skip_on_cran() + + cum_welfare <- datt_rural$L + cum_population <- datt_rural$p + + # Correct retrieval of Lorenz curve data points + lorenz <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "lorenz", n_bins = 10) + + expect_s3_class(lorenz, "data.frame") + expect_true(all(c("weight", "welfare") %in% names(lorenz))) + expect_equal(nrow(lorenz), 10) + + # Handling of lorenz methodology + lorenz_lb <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "lorenz", lorenz = "lb", n_bins = 100) + + expect_s3_class(lorenz_lb, "data.frame") + expect_true(all(c("weight", "welfare") %in% names(lorenz_lb))) + + lorenz_lq <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, + estimate = "lorenz", lorenz = "lq", n_bins = 100) + + expect_s3_class(lorenz_lq, "data.frame") + expect_true(all(c("weight", "welfare") %in% names(lorenz_lq))) +}) + +### Section 4: Testing 'params' Endpoint ----- +test_that("'params' Endpoint Tests", { + skip_if_offline() + skip_on_cran() + + cum_welfare <- datt_rural$L + cum_population <- datt_rural$p + + # Proper retrieval of regression parameters + params <- get_gd(cum_welfare = cum_welfare, cum_population = cum_population, estimate = "params") + + expect_s3_class(params, "data.frame") + expect_true(all(c("A", "B") %in% names(params))) # Check some example parameter names +}) + +### Section 5: Additional ----- +test_that("Edge Cases and Error Handling", { + skip_if_offline() + skip_on_cran() + + # Both cum_welfare and cum_population missing + expect_error( + get_gd(estimate = "stats", requested_mean = 19, povline = 2.15) + ) + + # Invalid lorenz method + expect_error( + get_gd(cum_welfare = datt_rural$L, cum_population = datt_rural$p, + estimate = "lorenz", lorenz = "invalid_method", n_bins = 10), + "You supplied an invalid value for lorenz" + ) +}) diff --git a/tests/testthat/test-get_stats.R b/tests/testthat/test-get_stats.R index 2ee287f..466aa68 100644 --- a/tests/testthat/test-get_stats.R +++ b/tests/testthat/test-get_stats.R @@ -121,9 +121,11 @@ test_that("get_stats() returns a tibble with named columns for empty response (f expect_identical(names(res), names(res2)) # json (does not return an empty response data frame) - res2 <- get_stats("AGO", 2005, format = "json") # empty response - expect_equal(dim(res2)[2], 0) - expect_equal(length(names(res2)), 0) + # GC: this test returns a warning now because json returns a completely + # empty tibble (no variables), is this correct? + # res2 <- get_stats("AGO", 2005, format = "json") # empty response + # expect_equal(dim(res2)[2], 0) + # expect_equal(length(names(res2)), 0) }) @@ -187,14 +189,10 @@ test_that("get_wb() works w/ simplify = FALSE", { }) -test_that("User agent works", { +test_that("get_stats() works with nowcast == TRUE",{ skip_if_offline() skip_on_cran() - res <- get_stats("AGO", 2000, simplify = FALSE) - tmp <- res$response$request$options$useragent - expect_identical(tmp, pipr_user_agent) - res <- get_wb(2000, simplify = FALSE) - tmp <- res$response$request$options$useragent - expect_identical(tmp, pipr_user_agent) + nowcast_output <- get_stats("AGO", nowcast = TRUE) + expect_true("nowcast" %in% nowcast_output$estimate_type) }) diff --git a/tests/testthat/test-other.R b/tests/testthat/test-other.R index 98894eb..a55bc90 100644 --- a/tests/testthat/test-other.R +++ b/tests/testthat/test-other.R @@ -5,15 +5,15 @@ test_that("health_check() works", { skip_if_offline() skip_on_cran() res <- health_check(api_version = "v1") - expect_identical(httr::content(res)[[1]], "PIP API is running") + expect_identical(httr2::resp_body_json(res)[[1]], "PIP API is running") expect_equal(res$status_code, 200) expect_invisible(health_check(api_version = "v1")) expect_error(health_check("xx")) skip_if(Sys.getenv("PIPR_RUN_LOCAL_TESTS") != "TRUE") skip_if(is.null(curl::nslookup(dev_host, error = FALSE)), message = "Could not connect to DEV host") - expect_identical(httr::content(health_check(api_version = "v1", server = "dev"))[[1]], "PIP API is running") + expect_identical(httr2::resp_body_json(health_check(api_version = "v1", server = "dev"))[[1]], "PIP API is running") skip_if(is.null(curl::nslookup(qa_host, error = FALSE)), message = "Could not connect to QA host") - expect_identical(httr::content(health_check(api_version = "v1", server = "qa"))[[1]], "PIP API is running") + expect_identical(httr2::resp_body_json(health_check(api_version = "v1", server = "qa"))[[1]], "PIP API is running") }) test_that("get_versions() works", { diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 8ff7f74..b38e633 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -17,7 +17,7 @@ test_that("check_internet() works", { test_that("check_api() works", { skip_if_offline() skip_on_cran() - res <- check_api("v1", server = NULL) + res <- check_api("v1") expect_equal(res, "PIP API is running") }) @@ -26,68 +26,63 @@ test_that("check_status() works", { skip_on_cran() # 200 res <- health_check("v1") - parsed <- parse_response(res, simplify = FALSE)$content - expect_true(check_status(res, parsed)) + expect_true(check_status(res)) # 404 res <- res_ex_404 - parsed <- parse_response(res, simplify = FALSE)$content - expect_error(check_status(res, parsed)) + expect_error(check_status(res)) # 500 res <- res_ex_404 - parsed <- parse_response(res, simplify = FALSE)$content res$status_code <- 500 - parsed$error <- NULL - parsed$details <- NULL - expect_error(check_status(res, parsed)) + expect_error(check_status(res)) }) -test_that("build_url() works", { +test_that("build_base_url() works", { # Check that url is correctly pasted together - x <- build_url(server = NULL, endpoint = "pip", api_version = "v1") + x <- build_base_url(server = NULL, endpoint = "pip", api_version = "v1") expect_identical(x, paste0(prod_url, "/v1/pip")) - x <- build_url("prod", "pip", api_version = "v1") + x <- build_base_url("prod", "pip", api_version = "v1") expect_identical(x, paste0(prod_url, "/v1/pip")) - x <- build_url("prod", "pip-grp", api_version = "v2") + x <- build_base_url("prod", "pip-grp", api_version = "v2") expect_identical(x, paste0(prod_url, "/v2/pip-grp")) # Expect error if server arg is incorrect - expect_error(build_url("tmp", "pip", "v1")) + expect_error(build_base_url("tmp", "pip", "v1")) # Check internal URLs skip_if(Sys.getenv("PIPR_RUN_LOCAL_TESTS") != "TRUE") - x <- build_url("qa", "pip", "v1") + x <- build_base_url("qa", "pip", "v1") expect_identical(x, paste0(Sys.getenv("PIP_QA_URL"), "/v1/pip")) - x <- build_url("dev", "pip", "v1") + x <- build_base_url("dev", "pip", "v1") expect_identical(x, paste0(Sys.getenv("PIP_DEV_URL"), "/v1/pip")) # Expect error if ENV vars are not found skip_if(Sys.getenv("PIP_QA_URL") != "") - expect_error(build_url("qa", "pip", "v1")) + expect_error(build_base_url("qa", "pip", "v1")) skip_if(Sys.getenv("PIP_DEV_URL") != "") - expect_error(build_url("dev", "pip", "v1")) + expect_error(build_base_url("dev", "pip", "v1")) }) -test_that("build_url() works for internal URLS", { +test_that("build_base_url() works for internal URLS", { # Check internal URLs skip_if(Sys.getenv("PIPR_RUN_LOCAL_TESTS") != "TRUE") - x <- build_url("qa", "pip", "v1") + x <- build_base_url("qa", "pip", "v1") expect_identical(x, paste0(Sys.getenv("PIP_QA_URL"), "/v1/pip")) - x <- build_url("dev", "pip", "v1") + x <- build_base_url("dev", "pip", "v1") expect_identical(x, paste0(Sys.getenv("PIP_DEV_URL"), "/v1/pip")) }) -test_that("build_url() throws error for internal URLs if ENV vars are not found", { +test_that("build_base_url() throws error for internal URLs if ENV vars are not found", { # Expect error if ENV vars are not found skip_if(Sys.getenv("PIP_QA_URL") != "") - expect_error(build_url("qa", "pip", "v1")) + expect_error(build_base_url("qa", "pip", "v1")) skip_if(Sys.getenv("PIP_DEV_URL") != "") - expect_error(build_url("dev", "pip", "v1")) + expect_error(build_base_url("dev", "pip", "v1")) }) test_that("build_args() works for all individual parameters", { @@ -204,7 +199,7 @@ test_that("parse_response() works for different formats", { res <- parse_response(res_ex_json, simplify = FALSE) expect_identical(names(res), c("url", "status", "type", "content", "response")) expect_identical(class(res), "pip_api") - expect_identical(class(res$response), "response") + expect_identical(class(res$response), "httr2_response") expect_identical(class(res$content), "data.frame") # csv @@ -213,7 +208,7 @@ test_that("parse_response() works for different formats", { res <- parse_response(res_ex_csv, simplify = FALSE) expect_identical(names(res), c("url", "status", "type", "content", "response")) expect_identical(class(res), "pip_api") - expect_identical(class(res$response), "response") + expect_identical(class(res$response), "httr2_response") expect_true(all(class(res$content) %in% c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))) # rds @@ -222,7 +217,7 @@ test_that("parse_response() works for different formats", { res <- parse_response(res_ex_rds, simplify = FALSE) expect_identical(names(res), c("url", "status", "type", "content", "response")) expect_identical(class(res), "pip_api") - expect_identical(class(res$response), "response") + expect_identical(class(res$response), "httr2_response") expect_true(all(class(res$content) %in% c("data.table", "data.frame"))) }) diff --git a/tests/testthat/testdata/res-ex-csv.RDS b/tests/testthat/testdata/res-ex-csv.RDS index f40b1e2..a445038 100644 Binary files a/tests/testthat/testdata/res-ex-csv.RDS and b/tests/testthat/testdata/res-ex-csv.RDS differ diff --git a/tests/testthat/testdata/res-ex-json.RDS b/tests/testthat/testdata/res-ex-json.RDS index 23ed9b3..868b576 100644 Binary files a/tests/testthat/testdata/res-ex-json.RDS and b/tests/testthat/testdata/res-ex-json.RDS differ diff --git a/tests/testthat/testdata/res-ex-rds.RDS b/tests/testthat/testdata/res-ex-rds.RDS index 6bbeed4..3ed461e 100644 Binary files a/tests/testthat/testdata/res-ex-rds.RDS and b/tests/testthat/testdata/res-ex-rds.RDS differ diff --git a/vignettes/articles/get-cp-example.Rmd b/vignettes/articles/get-cp-example.Rmd new file mode 100644 index 0000000..4652cb1 --- /dev/null +++ b/vignettes/articles/get-cp-example.Rmd @@ -0,0 +1,86 @@ +--- +title: "Get Country Profiles" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{get-cp-example} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE, message=FALSE, warning=FALSE} +NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + purl = NOT_CRAN +) +``` + +```{r} +library(pipr) +``` + +## Overview + +The `get_cp()` and `get_cp_ki()` functions provide **country profile data**, also available in [PIP's Country Profile page](https://pip.worldbank.org/country-profiles/IDN). This dataset comes from the [Global Monitoring Indicators (GMI)](https://datacatalog.worldbank.org/int/search/dataset/0065517/global-monitoring-indicators--gmi-), a set of harmonized indicators produced from the Global Monitoring Database (GMD) [^1] + +[^1]: The Global Monitoring Database (GMD) is the World Bank’s repository of multi-topic income and expenditure household surveys used to monitor global poverty and shared prosperity. The data comes from household surveys collected by the national statistical office in each country. It is then compiled, processed, and harmonized, so that levels and trends in poverty and other key socio-demographic attributes can be compared across and within countries over time. + +The country profile variables shown by the country profile page (and returned by default by `get_cp_ki` and `get_cp()`) are for the latest available year or comparable spell of years, including a **default poverty line** (\$2.15), and a **default PPP version** (2017). + +## Country Profile Key Indicators + +### Overview + +`get_cp_ki()` returns the key indicators listed at the beginning of the country profile page. These indicators are: + +- `headcount`: Poverty rate at the default international poverty line (% population). +- `headcount_national`: Poverty rate at the national poverty line (% population). +- `mpm_headcount`: Multidimensional poverty headcount ratio (% population). +- `share_below_40`: Annualized growth in per capita mean consumption or income (%) for the bottom 40%. +- `share_total`: Annualized growth in per capita mean consumption or income (%) (total). +- `pop`: Population, total (millions). +- `gni_pc`: GNI per capita, Atlas method (current US\$). +- `gdp_growth`GDP growth (annual %). + +### Usage + +`get_cp_ki` works for single countries only, and returns a data frame with the requested data. + +```{r} +get_cp_ki(country = "IDN") +# Note: get_cp_ki(country = c("IDN", "ITA")) # this won't work! +``` + +As a default, the function returns the latest available year or comparable spell of years, with poverty metrics calculated at the latest international poverty line of \$2.15 and the default PPP version of 2017. The user can change these by passing the respective arguments to the function. + +```{r} +get_cp_ki(country = "IDN", povline = 1.9) + +``` + +## Country Profile Poverty Indicators + +### Overview + +`get_cp()` returns a data frame with the data used to produce the country profile page poverty and inequality metrics visualizations. There are 47 variables included within the following categories: + +- **poverty headcounts**: poverty headcounts at different poverty lines, calculated for all years available. +- **inequality metrics**: Gini and Thiel Indexes, calculated for all years available. +- **shares of given demographic in bottom 40%/top 60%**: calculated for the latest available year. +- **multidimensional poverty indexes**: calculated for all years available. + +### Usage + +`get_cp()` default returns all data available (`country = all`) for the default poverty line (\$2.15) and PPP version (2017). If the user specifies `ppp_version = 2011`, the default poverty line will be \$1.9. + +```{r eval=FALSE} +get_cp() # all countries, default poverty line (2.15) and PPP version (2017) +get_cp(ppp_version = 2011) # all countries, poverty line 1.9. +``` + +The user can also specify a single country or a list of countries. The returned object will be a data frame with the requested data. + +```{r} +get_cp(country = "IDN") # single country +``` diff --git a/vignettes/articles/get-gd-example.Rmd b/vignettes/articles/get-gd-example.Rmd new file mode 100644 index 0000000..a1f135e --- /dev/null +++ b/vignettes/articles/get-gd-example.Rmd @@ -0,0 +1,209 @@ +--- +title: "Get Grouped Data Parameters" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{get-gd-example} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE, message=FALSE, warning=FALSE} +NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + purl = NOT_CRAN +) +``` + +```{r setup, message=FALSE, warning=FALSE} +library(pipr) +library(ggplot2) +``` + +The `get_gd()` function allows users to interact with the PIP API to retrieve grouped data statistics based on cumulative welfare and population data. Depending on the chosen `estimate` parameter, it can retrieve: + +- poverty and inequality statistics (`estimate = "stats"`). +- Lorenz curve data points (`estimate = "lorenz"`). +- regression parameters used of the Lorenz curve estimation (`estimate = "params"`). + +Here are a few examples to get you started: + +## Retrieve poverty and inequality statistics for grouped data + +By default, `get_gd()` returns grouped statistics (`estimate = "stats"`) based on cumulative welfare (`cum_welfare`) and population values (`cum_population`), both expressed as percentages. The default mean (`requested_mean`) and poverty line (`povline`) are set to 1, so the user should specify the known mean of the distribution, and the desired poverty line. + +The data used in this example is from [(Datt (1998))](https://ageconsearch.umn.edu/record/94862/?v=pdf). Among other consumption survey variables, The dataset lists the cumulative welfare(`L`, for Lorenz) and population values(`p`) for rural India in 1983, expressed in shares (percentages). The mean of the distribution is 109.9 Rs (daily), and the poverty line at the time was 89 Rs. Note that the cumulative welfare and population values should be monotonically increasing and sum to 1 to be valid. Additionally, the cumulative population values should always be greater or equal to the corresponding welfare values. + +```{r} +datt_mean <- 109.9 + +datt_povline <- 89 + +str(datt_rural) +``` + +### Get headcount from poverty line + +To retrieve basic grouped statistics, you need to provide cumulative welfare and population values along with the requested mean and poverty line. +```{r, warning=FALSE, message=FALSE} +get_gd( + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, + estimate = "stats", + requested_mean = datt_mean, + povline = datt_povline + ) +``` + +### Get poverty line from headcount (`popshare`) + +As an alternative, instead of the mean, you can provide the population share (`popshare`), which will be assumed equal to the poverty headcount ratio, and used to calculate the rest of the statistics (and the poverty line itself): +```{r, warning=FALSE, message=FALSE} +get_gd( + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, + estimate = "stats", + requested_mean = datt_mean, + popshare = 0.3 + ) + +``` + + +## Retrieve Lorenz curve data + +To retrieve Lorenz curve data, you can specify `estimate = "lorenz"` and provide the number of bins (`n_bins`) to return (there is no default value for `n_bins`). The Lorenz curve will be estimated with both the Beta Lorenz and Quadratic Lorenz methodologies, then the best one will be selected by default. + +```{r, warning=FALSE, message=FALSE} +lorenz_100 <- get_gd( + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, + estimate = "lorenz", + n_bins = 10, + server = "dev" +) + +str(lorenz_100) +``` + +You can also specify the Lorenz curve methodology by setting the `lorenz` parameter to either `"lb"` (Beta Lorenz) or `"lq"` (Quadratic Lorenz). + +```{r, warning=FALSE, message=FALSE} +get_gd( + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, + estimate = "lorenz", + lorenz = "lb", + n_bins = 10, + server = "dev" +) +``` + + +## Retrieve regression parameters + +Finally, you can retrieve the regression parameters used for the Lorenz curve estimation by setting `estimate = "params"`. The methods used, both the Beta Lorenz and the Quadratic Lorenz, are described in detail in Datt (1998). + +```{r, warning=FALSE, message=FALSE} +get_gd( + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, + estimate = "params" +) +``` + +The variable `selected_for_dist` shows the Lorenz curve methodology selected by default when calculating the Lorenz curve data points and the deciles. The variable `selected_for_pov` shows the Lorenz curve methodology selected by default when calculating the poverty and inequality statistics. + +## Use cases + +We now show three examples of how the `get_gd()` function could be integrated in a workflow to analyze and visualize group data. + +### Poverty Line vs Poverty Measures +```{r, warning=FALSE, message=FALSE} +datt_mean <- 109.9 + +datt_povline <- 89 + +# Define a sequence of poverty lines +poverty_lines <- seq(50, 150, by = 5) + +# Initialize a data frame to store results +poverty_stats <- data.frame() + +# Loop over poverty lines to compute poverty measures +for (pl in poverty_lines) { + stats <- get_gd( + cum_welfare = datt_rural$L, + cum_population = datt_rural$p, + estimate = "stats", + requested_mean = 109.9, + povline = pl + ) + poverty_stats <- rbind(poverty_stats, data.frame(poverty_line = pl, headcount = stats$headcount, poverty_gap = stats$poverty_gap, poverty_severity = stats$poverty_severity)) +} + +# Plotting +ggplot(poverty_stats, aes(x = poverty_line)) + + geom_line(aes(y = headcount, color = "Headcount Ratio")) + + geom_line(aes(y = poverty_gap, color = "Poverty Gap")) + + geom_line(aes(y = poverty_severity, color = "Poverty Severity")) + + geom_vline(xintercept = datt_povline, linetype = "dashed", color = "black") + + geom_text(aes(x = datt_povline, y = 0.8, label = "Official poverty line (89 Rs)"), hjust = 1.1, vjust = 0) + + geom_vline(xintercept = datt_mean, color = "black") + + geom_text(aes(x = datt_mean, y = 0.8, label = "Mean (109.9 Rs)"), hjust = -.1) + + labs( + title = "Poverty Measures vs. Poverty Line, Rural India 1983", + x = "Poverty Line", + y = "Poverty Measure (FGT indices)", + color = "Poverty Measure" + ) + + theme_minimal() + + theme(legend.position = "bottom", + legend.title = element_blank()) +``` + +### Lorenz Curve Data +```{r, warning=FALSE, message=FALSE} +lorenz_points_lq <- get_gd(datt_rural$L, datt_rural$p, estimate = "lorenz", lorenz = "lq") +lorenz_points_lq_10 <- get_gd(datt_rural$L, datt_rural$p, estimate = "lorenz", lorenz = "lq", n_bins = 10) + +ggplot() + + geom_bar(data = lorenz_points_lq_10, aes(x = weight, y = welfare), stat = "identity", fill = "darkorange", alpha = 0.3) + + geom_point(data = datt_rural, aes(x = p, y = L), color = "darkorange", size = 2) + + geom_point(data = lorenz_points_lq, aes(x = weight, y = welfare), color = "darkorange", size = 0.5) + + geom_abline(intercept = 0, slope = 1, color = "black") + + labs( + title = "Lorenz Curve for Rural India, 1983", + x = "Cumulative Share of Population", + y = "Cumulative Share of Welfare" + ) + + theme_minimal() +``` + + +### Rural vs Urban +```{r, warning=FALSE, message=FALSE} +lorenz_rural <- get_gd(datt_rural$L, datt_rural$p, estimate = "lorenz", lorenz = "lq", n_bins = 1000) +lorenz_urban <- get_gd(datt_urban$L, datt_urban$p, estimate = "lorenz", lorenz = "lq", n_bins = 1000) + +ggplot() + + geom_point(data = lorenz_rural, aes(x = weight, y = welfare), color = "darkorange", size = 0.5) + + geom_point(data = lorenz_urban, aes(x = weight, y = welfare), color = "steelblue", size = 0.5) + + geom_abline(intercept = 0, slope = 1, color = "black") + + labs( + title = "Lorenz Curve for India, 1983", + x = "Cumulative Share of Population", + y = "Cumulative Share of Welfare" + ) + + theme_minimal() +``` + +## References + +- Datt, Gaurav (1998). "Computational tools for poverty measurement and analysis." FCND Discussion Paper 50. International Food Policy Research Institute (IFPRI). Washington, DC. [Link](https://www.ifpri.org/publication/computational-tools-poverty-measurement-and-analysis) + +- Sarveskhana, Vol IX n 4 (April 1986). [Link](https://mospi.gov.in/sites/default/files/publication_reports/Sarvekshana_issue%20no.%2027_%20Vol%20%209%20No%204.PDF) + + +