From 1011c5a9ed6e967895783d5ea4181ba630f88966 Mon Sep 17 00:00:00 2001 From: William Curran-groome Date: Sun, 7 Apr 2024 14:30:15 -0400 Subject: [PATCH] updating generate_codebook() --- R/generate_codebook.R | 9 ++++----- tests/testthat/test-generate_codebook.R | 25 ++++++++++++------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/R/generate_codebook.R b/R/generate_codebook.R index 910a72f..89468e3 100644 --- a/R/generate_codebook.R +++ b/R/generate_codebook.R @@ -22,9 +22,8 @@ generate_codebook = function(.data) { ####----Variable Crosswalk----#### - list_acs_expression = ({ - expression_list = rlang::enexpr(list_acs_variables) %>% as.list() - subset_expression = expression_list[[2]][[4]][[3]] }) + expression_list = rlang::enexpr(list_acs_variables) %>% as.list() + list_acs_expression = expression_list[[2]][[4]][[3]] ## this covers all the manually named variables, does not include those selected via select_variables() variable_crosswalk = list_acs_expression %>% @@ -131,7 +130,7 @@ generate_codebook = function(.data) { dplyr::select(c( dplyr::matches(positive_matches), -dplyr::matches(negative_matches), - -dplyr::matches("percent$"), + -dplyr::matches("percent$|_M$"), dplyr::all_of(positive_columns), -dplyr::all_of(negative_columns))) %>% colnames @@ -139,7 +138,7 @@ generate_codebook = function(.data) { } else { input_columns = .data %>% dplyr::select(dplyr::matches(selection_term)) %>% - dplyr::select(-dplyr::matches("percent$")) %>% + dplyr::select(-dplyr::matches("percent$|_M$")) %>% colnames } output_column_naming_syntax = across_call %>% diff --git a/tests/testthat/test-generate_codebook.R b/tests/testthat/test-generate_codebook.R index 60e1d25..2dfe68a 100644 --- a/tests/testthat/test-generate_codebook.R +++ b/tests/testthat/test-generate_codebook.R @@ -1,22 +1,21 @@ ####----Load Test Data----#### ## Statistics for NJ Counties -df_test = urbnindicators::compile_acs_data( +df = urbnindicators::compile_acs_data( variables = urbnindicators::list_acs_variables(year = "2022"), years = 2022, geography = "county", states = "NJ", counties = NULL, retain_moes = TRUE, - spatial = FALSE) %>% - dplyr::select(-dplyr::matches("_M$")) + spatial = FALSE) -results = generate_codebook(.data = df_test) +codebook = attr(df, "codebook") #####----TESTING----##### ## No missingness in codebook - results_missingness = results %>% + results_missingness = codebook %>% dplyr::filter(dplyr::if_any(.cols = dplyr::everything(), ~ is.na(.x))) %>% nrow @@ -25,7 +24,7 @@ results = generate_codebook(.data = df_test) { testthat::expect_equal(results_missingness, 0) } ) ## No transcribed function calls - results_transcribed_functions = results %>% + results_transcribed_functions = codebook %>% dplyr::filter(dplyr::if_any(.cols = dplyr::everything(), ~ stringr::str_detect(.x, "dplyr"))) %>% nrow @@ -34,7 +33,7 @@ results = generate_codebook(.data = df_test) { testthat::expect_equal(results_transcribed_functions, 0) } ) ## No missing raw variable codes - results_missing_raw_variables = results %>% + results_missing_raw_variables = codebook %>% dplyr::filter(dplyr::if_any(.cols = dplyr::everything(), ~ stringr::str_detect(.x, "\\(\\)|\\(NA\\)"))) %>% nrow @@ -43,7 +42,7 @@ results = generate_codebook(.data = df_test) { testthat::expect_equal(results_missing_raw_variables, 0) } ) ## No universe variables in numerators (except population density) - results_universe_numerators = results %>% + results_universe_numerators = codebook %>% dplyr::filter(stringr::str_detect(definition, "Numerator.*universe.*Denominator")) %>% nrow @@ -52,7 +51,7 @@ results = generate_codebook(.data = df_test) { testthat::expect_equal(results_universe_numerators, 1) } ) ## No definitions for variables that are percentages of universes (not possible) - results_universe_percentages = results %>% + results_universe_percentages = codebook %>% dplyr::filter(stringr::str_detect(calculated_variable, "universe.*percent$")) %>% nrow @@ -61,8 +60,8 @@ results = generate_codebook(.data = df_test) { testthat::expect_equal(results_universe_percentages, 0) } ) ## No codebook variable definitions that are missing from the input dataset - results_phantom_definitions = results %>% - dplyr::filter(!(calculated_variable %in% (df_test %>% colnames))) %>% + results_phantom_definitions = codebook %>% + dplyr::filter(!(calculated_variable %in% (df %>% colnames))) %>% nrow testthat::test_that( @@ -70,8 +69,8 @@ results = generate_codebook(.data = df_test) { testthat::expect_equal(results_phantom_definitions, 0) } ) ## All variables in the input data are in the codebook -derived_variables = df_test %>% dplyr::select(dplyr::matches("percent$")) %>% colnames -undefined_variables = derived_variables[!(derived_variables %in% (results %>% dplyr::pull(calculated_variable)))] +derived_variables = df %>% dplyr::select(dplyr::matches("percent$")) %>% colnames +undefined_variables = derived_variables[!(derived_variables %in% (codebook %>% dplyr::pull(calculated_variable)))] testthat::test_that( "All variables in the input data are in the codebook.",