Skip to content

Commit

Permalink
Merge pull request #28 from UI-Research/codebook
Browse files Browse the repository at this point in the history
updating generate_codebook()
  • Loading branch information
wcurrangroome authored Apr 7, 2024
2 parents 8c0c493 + 1011c5a commit 9b9f82e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 18 deletions.
9 changes: 4 additions & 5 deletions R/generate_codebook.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@
generate_codebook = function(.data) {

####----Variable Crosswalk----####
list_acs_expression = ({
expression_list = rlang::enexpr(list_acs_variables) %>% as.list()
subset_expression = expression_list[[2]][[4]][[3]] })
expression_list = rlang::enexpr(list_acs_variables) %>% as.list()
list_acs_expression = expression_list[[2]][[4]][[3]]

## this covers all the manually named variables, does not include those selected via select_variables()
variable_crosswalk = list_acs_expression %>%
Expand Down Expand Up @@ -131,15 +130,15 @@ generate_codebook = function(.data) {
dplyr::select(c(
dplyr::matches(positive_matches),
-dplyr::matches(negative_matches),
-dplyr::matches("percent$"),
-dplyr::matches("percent$|_M$"),
dplyr::all_of(positive_columns),
-dplyr::all_of(negative_columns))) %>%
colnames

} else {
input_columns = .data %>%
dplyr::select(dplyr::matches(selection_term)) %>%
dplyr::select(-dplyr::matches("percent$")) %>%
dplyr::select(-dplyr::matches("percent$|_M$")) %>%
colnames }

output_column_naming_syntax = across_call %>%
Expand Down
25 changes: 12 additions & 13 deletions tests/testthat/test-generate_codebook.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
####----Load Test Data----####

## Statistics for NJ Counties
df_test = urbnindicators::compile_acs_data(
df = urbnindicators::compile_acs_data(
variables = urbnindicators::list_acs_variables(year = "2022"),
years = 2022,
geography = "county",
states = "NJ",
counties = NULL,
retain_moes = TRUE,
spatial = FALSE) %>%
dplyr::select(-dplyr::matches("_M$"))
spatial = FALSE)

results = generate_codebook(.data = df_test)
codebook = attr(df, "codebook")

#####----TESTING----#####

## No missingness in codebook
results_missingness = results %>%
results_missingness = codebook %>%
dplyr::filter(dplyr::if_any(.cols = dplyr::everything(), ~ is.na(.x))) %>%
nrow

Expand All @@ -25,7 +24,7 @@ results = generate_codebook(.data = df_test)
{ testthat::expect_equal(results_missingness, 0) } )

## No transcribed function calls
results_transcribed_functions = results %>%
results_transcribed_functions = codebook %>%
dplyr::filter(dplyr::if_any(.cols = dplyr::everything(), ~ stringr::str_detect(.x, "dplyr"))) %>%
nrow

Expand All @@ -34,7 +33,7 @@ results = generate_codebook(.data = df_test)
{ testthat::expect_equal(results_transcribed_functions, 0) } )

## No missing raw variable codes
results_missing_raw_variables = results %>%
results_missing_raw_variables = codebook %>%
dplyr::filter(dplyr::if_any(.cols = dplyr::everything(), ~ stringr::str_detect(.x, "\\(\\)|\\(NA\\)"))) %>%
nrow

Expand All @@ -43,7 +42,7 @@ results = generate_codebook(.data = df_test)
{ testthat::expect_equal(results_missing_raw_variables, 0) } )

## No universe variables in numerators (except population density)
results_universe_numerators = results %>%
results_universe_numerators = codebook %>%
dplyr::filter(stringr::str_detect(definition, "Numerator.*universe.*Denominator")) %>%
nrow

Expand All @@ -52,7 +51,7 @@ results = generate_codebook(.data = df_test)
{ testthat::expect_equal(results_universe_numerators, 1) } )

## No definitions for variables that are percentages of universes (not possible)
results_universe_percentages = results %>%
results_universe_percentages = codebook %>%
dplyr::filter(stringr::str_detect(calculated_variable, "universe.*percent$")) %>%
nrow

Expand All @@ -61,17 +60,17 @@ results = generate_codebook(.data = df_test)
{ testthat::expect_equal(results_universe_percentages, 0) } )

## No codebook variable definitions that are missing from the input dataset
results_phantom_definitions = results %>%
dplyr::filter(!(calculated_variable %in% (df_test %>% colnames))) %>%
results_phantom_definitions = codebook %>%
dplyr::filter(!(calculated_variable %in% (df %>% colnames))) %>%
nrow

testthat::test_that(
"No codebook entries for variables that don't exist in the input data.",
{ testthat::expect_equal(results_phantom_definitions, 0) } )

## All variables in the input data are in the codebook
derived_variables = df_test %>% dplyr::select(dplyr::matches("percent$")) %>% colnames
undefined_variables = derived_variables[!(derived_variables %in% (results %>% dplyr::pull(calculated_variable)))]
derived_variables = df %>% dplyr::select(dplyr::matches("percent$")) %>% colnames
undefined_variables = derived_variables[!(derived_variables %in% (codebook %>% dplyr::pull(calculated_variable)))]

testthat::test_that(
"All variables in the input data are in the codebook.",
Expand Down

0 comments on commit 9b9f82e

Please sign in to comment.