Skip to content

Commit

Permalink
Merge pull request #5 from WASHNote/2022-01-update
Browse files Browse the repository at this point in the history
2022 01 update --> v.0.1.4
  • Loading branch information
nickdickinson authored Feb 2, 2023
2 parents 532a067 + 23e6a85 commit c1df373
Show file tree
Hide file tree
Showing 22 changed files with 38 additions and 11 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: jmpwashdata
Type: Package
Title: WHO/UNICEF Joint Monitoring Programme Water and Sanitation Data
Version: 0.1.3.9000
Version: 0.1.4
Author: Nicolas Dickinson
Maintainer: Nicolas Dickinson <[email protected]>
Description: As a convenience, this package reproduces a snapshot of the JMP WASH household (currently as of July 2021), WASH in schools and WASH in health care facilities data that is normally available in Excel sheets on https://washdata.org.
Expand Down
23 changes: 20 additions & 3 deletions data-raw/jmpdatadownload.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ library(stringr)


.refresh_data_files <- function() {
.remove_previous_data()

page <- .download_page()

wld_data <- .download_aggregate_files(page, target = "WLD")
Expand All @@ -28,6 +30,16 @@ library(stringr)
compress = "bzip2")
}

.remove_previous_data <- function() {
unlink("data-raw/healthcare",recursive=TRUE)
unlink("data-raw/household",recursive=TRUE)
unlink("data-raw/inequalities",recursive=TRUE)
unlink("data-raw/REG",recursive=TRUE)
unlink("data-raw/schools",recursive=TRUE)
unlink("data-raw/WLD",recursive=TRUE)
}


## Data is updated no more than annually as of 2021-06-22
# target = "WLD" or "REG"
.download_aggregate_files <- function(page, target = "WLD") {
Expand All @@ -38,10 +50,11 @@ library(stringr)
.collect_metadata <- function(links) {
lapply(seq_along(links), function(i, x) {
.wait_approx(.1)
message("Getting filename for ", x[[i]])
jmp_excel <- HEAD("https://washdata.org/", path=x[[i]])
filename = str_extract(
jmp_excel$headers$`content-disposition`,
'(?<=").{1,128}(?=")'
'(?<=attachment; filename\\=).{1,128}(?=)'
)
type = str_extract(
x[[i]],
Expand All @@ -64,12 +77,16 @@ library(stringr)
files <- .collect_metadata(links)
lapply(files, function(x) {
print(x[1,"filename"])
print(paste0("https://washdata.org",
x[1,"path"]))
target_dir <- paste0("data-raw/", if (is.na(folder)) x[1, "type"] else folder)
if (!dir.exists(target_dir)) dir.create(target_dir)
.wait_approx(0.5)
print(jmp_excel <- RETRY("GET", paste0("https://washdata.org",
x[1,"path"]),
write_disk(paste0("data-raw/", if (is.na(folder)) x[1, "type"] else folder,"/",x[1,"filename"]), overwrite = overwrite),
write_disk(path = paste0(target_dir,"/",x[1,"filename"]), overwrite = overwrite),
verbose()
))
), timeout(1))
})
bind_rows(files)
}
Expand Down
24 changes: 17 additions & 7 deletions data-raw/jmpdataextract.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
library(rio)
library(dplyr)
library(tidyr)
library(futile.logger)
library(tryCatchLog)

load("data/jmp_files.rda")

Expand Down Expand Up @@ -130,7 +132,7 @@ var_attr <- function(x, attr_name, unlist = FALSE) {
message = c(names(warnings()), error_txt),
message_type = c(rep("warning", times = length(names(warnings()))), rep("error", times = length(error_txt)))
)
assign("last.warning", NULL, envir = baseenv())
tryCatch(assign("last.warning", NULL, envir = baseenv()), warning = function(cond) {invisible()})
}) %>% bind_rows()

usethis::use_data(
Expand Down Expand Up @@ -227,7 +229,7 @@ var_attr <- function(x, attr_name, unlist = FALSE) {
jmp_household_watsan_sources <- lapply(countries$geo, function(x) {
hh_path <- paste0("data-raw/household/", filter(jmp_files, geo == x, type == "household")$filename)

print(paste0("Watsan summary from: ", hh_path))
message(paste0("Watsan summary from: ", hh_path))

watsan_summary_data <- readxl::read_excel(hh_path, sheet = "Chart Data", range="A5:CL208", col_names = TRUE, col_types = c(rep("text", 2), rep("numeric", 88)))
watsan_summary_data <- watsan_summary_data %>% filter(if_any(everything(), ~ (!is.na(.)&.!=0)))
Expand All @@ -245,7 +247,7 @@ var_attr <- function(x, attr_name, unlist = FALSE) {
jmp_household_hygiene_sources <- lapply(countries$geo, function(x) {
hh_path <- paste0("data-raw/household/", filter(jmp_files, geo == x, type == "household")$filename)

print(paste0("Hygiene summary from: ", hh_path))
message(paste0("Hygiene summary from: ", hh_path))

hyg_summary_data <- readxl::read_excel(hh_path, sheet = "Chart Data", range="CM5:CU208", col_names = TRUE, col_types = c(rep("text", 2), rep("numeric", 7)))
hyg_summary_data <- hyg_summary_data %>% filter(if_any(everything(), ~ (!is.na(.)&.!=0)))
Expand All @@ -271,6 +273,8 @@ var_attr <- function(x, attr_name, unlist = FALSE) {
### Procedure to extract inequality data

.extract_inequalities_estimate_data <- function() {
message("--- Starting to extract inequality estimate data ---")

countries <- jmp_files %>% filter(type == "inequalities", !(geo %in% c("WLD", "REG")))

use_data <- usethis::use_data
Expand All @@ -279,12 +283,12 @@ var_attr <- function(x, attr_name, unlist = FALSE) {
lapply(c("water", "sanitation"), function(service_type) {
dataset_name <- paste0("jmp_inequality_",service_type,"_estimate")

print(dataset_name)
message(dataset_name)

dataset <- lapply(countries$geo, function(x) {
ineq_path <- paste0("data-raw/inequalities/", filter(countries, geo == x)$filename)

print(ineq_path)
message(sprintf("Extracting from %s", ineq_path))

.get_watsan_quintile_estimates(
ineq_path = ineq_path,
Expand All @@ -302,6 +306,8 @@ var_attr <- function(x, attr_name, unlist = FALSE) {
}

.extract_inequalities_region_data <- function(verbose = FALSE) {
message("--- Starting to extract inequality region data ---")

countries <- jmp_files %>% filter(type == "inequalities", !(geo %in% c("WLD", "REG")))
#%>% filter(geo == "NPL")

Expand Down Expand Up @@ -335,6 +341,8 @@ var_attr <- function(x, attr_name, unlist = FALSE) {
}

.extract_inequalities_source_data <- function() {
message("--- Starting to extract source data ---")

countries <- jmp_files %>% filter(type == "inequalities", !(geo %in% c("WLD", "REG")))
#%>% slice_head(n = 2)

Expand Down Expand Up @@ -368,6 +376,8 @@ var_attr <- function(x, attr_name, unlist = FALSE) {


.extract_inequalities_data_summary <- function() {
message("--- Starting to extract inequality data summary ---")

countries <- jmp_files %>% filter(type == "inequalities", !(geo %in% c("WLD", "REG"))) #%>% slice_head(n = 2)

use_data <- usethis::use_data
Expand Down Expand Up @@ -557,14 +567,14 @@ var_attr <- function(x, attr_name, unlist = FALSE) {

# for later - would be more readable to name the residence ranges
lapply(1:3, function(x, ranges) {
message(x)
message(sprintf("ranges$residence[[%d]]", x))
quin_vars <- suppressMessages(
readxl::read_excel(ineq_path, sheet = sheet, range=ranges$residence[[x]], col_names = TRUE)
) %>%
.estimate_quintile_vars(iso3)

lapply(1:5, function(y, quintile_list) {
print(y*1000)
message(sprintf("quintile %d", y))
df_quin <- suppressMessages(
readxl::read_excel(ineq_path, sheet = sheet, range=as.character(quintile_list[y]), col_names = TRUE)
)
Expand Down
Binary file modified data/jmp_extraction_messages.rda
Binary file not shown.
Binary file modified data/jmp_files.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_reg_env_cleaning.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_reg_hygiene.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_reg_sanitation.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_reg_waste_man.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_reg_water.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_wld_env_cleaning.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_wld_hygiene.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_wld_sanitation.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_wld_waste_man.rda
Binary file not shown.
Binary file modified data/jmp_healthcare_wld_water.rda
Binary file not shown.
Binary file modified data/jmp_household_watsan_sources.rda
Binary file not shown.
Binary file modified data/jmp_schools_reg_hygiene.rda
Binary file not shown.
Binary file modified data/jmp_schools_reg_sanitation.rda
Binary file not shown.
Binary file modified data/jmp_schools_reg_water.rda
Binary file not shown.
Binary file modified data/jmp_schools_wld_hygiene.rda
Binary file not shown.
Binary file modified data/jmp_schools_wld_sanitation.rda
Binary file not shown.
Binary file modified data/jmp_schools_wld_water.rda
Binary file not shown.

0 comments on commit c1df373

Please sign in to comment.