Merge pull request #28 from UI-Research/codebook

updating generate_codebook()
UI-Research · Apr 7, 2024 · 9b9f82e · 9b9f82e
2 parents 8c0c493 + 1011c5a
commit 9b9f82e
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 18 deletions.
diff --git a/R/generate_codebook.R b/R/generate_codebook.R
@@ -22,9 +22,8 @@
 generate_codebook = function(.data)  {
 
     ####----Variable Crosswalk----####
-    list_acs_expression = ({
-      expression_list = rlang::enexpr(list_acs_variables) %>% as.list()
-      subset_expression = expression_list[[2]][[4]][[3]] })
+    expression_list = rlang::enexpr(list_acs_variables) %>% as.list()
+    list_acs_expression = expression_list[[2]][[4]][[3]]
 
     ## this covers all the manually named variables, does not include those selected via select_variables()
     variable_crosswalk = list_acs_expression %>%
@@ -131,15 +130,15 @@ generate_codebook = function(.data)  {
           dplyr::select(c(
             dplyr::matches(positive_matches),
             -dplyr::matches(negative_matches),
-            -dplyr::matches("percent$"),
+            -dplyr::matches("percent$|_M$"),
             dplyr::all_of(positive_columns),
             -dplyr::all_of(negative_columns))) %>%
           colnames
 
       } else {
         input_columns = .data %>%
           dplyr::select(dplyr::matches(selection_term)) %>%
-          dplyr::select(-dplyr::matches("percent$")) %>%
+          dplyr::select(-dplyr::matches("percent$|_M$")) %>%
           colnames }
 
       output_column_naming_syntax = across_call %>%

diff --git a/tests/testthat/test-generate_codebook.R b/tests/testthat/test-generate_codebook.R
@@ -1,22 +1,21 @@
 ####----Load Test Data----####
 
 ## Statistics for NJ Counties
-df_test = urbnindicators::compile_acs_data(
+df = urbnindicators::compile_acs_data(
   variables = urbnindicators::list_acs_variables(year = "2022"),
   years = 2022,
   geography = "county",
   states = "NJ",
   counties = NULL,
   retain_moes = TRUE,
-  spatial = FALSE) %>%
-  dplyr::select(-dplyr::matches("_M$"))
+  spatial = FALSE)
 
-results = generate_codebook(.data = df_test)
+codebook = attr(df, "codebook")
 
 #####----TESTING----#####
 
 ## No missingness in codebook
-  results_missingness = results %>%
+  results_missingness = codebook %>%
     dplyr::filter(dplyr::if_any(.cols = dplyr::everything(), ~ is.na(.x))) %>%
     nrow
 
@@ -25,7 +24,7 @@ results = generate_codebook(.data = df_test)
     { testthat::expect_equal(results_missingness, 0) } )
 
 ## No transcribed function calls
-  results_transcribed_functions = results %>%
+  results_transcribed_functions = codebook %>%
     dplyr::filter(dplyr::if_any(.cols = dplyr::everything(), ~ stringr::str_detect(.x, "dplyr"))) %>%
     nrow
 
@@ -34,7 +33,7 @@ results = generate_codebook(.data = df_test)
     { testthat::expect_equal(results_transcribed_functions, 0) } )
 
 ## No missing raw variable codes
-  results_missing_raw_variables = results %>%
+  results_missing_raw_variables = codebook %>%
     dplyr::filter(dplyr::if_any(.cols = dplyr::everything(), ~ stringr::str_detect(.x, "\\(\\)|\\(NA\\)"))) %>%
     nrow
 
@@ -43,7 +42,7 @@ results = generate_codebook(.data = df_test)
     { testthat::expect_equal(results_missing_raw_variables, 0) } )
 
 ## No universe variables in numerators (except population density)
-  results_universe_numerators = results %>%
+  results_universe_numerators = codebook %>%
     dplyr::filter(stringr::str_detect(definition, "Numerator.*universe.*Denominator")) %>%
     nrow
 
@@ -52,7 +51,7 @@ results = generate_codebook(.data = df_test)
     { testthat::expect_equal(results_universe_numerators, 1) } )
 
 ## No definitions for variables that are percentages of universes (not possible)
-  results_universe_percentages = results %>%
+  results_universe_percentages = codebook %>%
     dplyr::filter(stringr::str_detect(calculated_variable, "universe.*percent$")) %>%
     nrow
 
@@ -61,17 +60,17 @@ results = generate_codebook(.data = df_test)
     { testthat::expect_equal(results_universe_percentages, 0) } )
 
 ## No codebook variable definitions that are missing from the input dataset
-  results_phantom_definitions = results %>%
-    dplyr::filter(!(calculated_variable %in% (df_test %>% colnames))) %>%
+  results_phantom_definitions = codebook %>%
+    dplyr::filter(!(calculated_variable %in% (df %>% colnames))) %>%
     nrow
 
   testthat::test_that(
     "No codebook entries for variables that don't exist in the input data.",
     { testthat::expect_equal(results_phantom_definitions, 0) } )
 
 ## All variables in the input data are in the codebook
-derived_variables = df_test %>% dplyr::select(dplyr::matches("percent$")) %>% colnames
-undefined_variables = derived_variables[!(derived_variables %in% (results %>% dplyr::pull(calculated_variable)))]
+derived_variables = df %>% dplyr::select(dplyr::matches("percent$")) %>% colnames
+undefined_variables = derived_variables[!(derived_variables %in% (codebook %>% dplyr::pull(calculated_variable)))]
 
 testthat::test_that(
   "All variables in the input data are in the codebook.",