UI-Research · jwalsh28 · Nov 8, 2024 · Dec 11, 2024 · Dec 19, 2024 · Dec 31, 2024
diff --git a/08_education/calculate-prek-county-2014.html b/08_education/calculate-prek-county-2014.html
diff --git a/08_education/calculate-prek-county-2016.html b/08_education/calculate-prek-county-2016.html
diff --git a/08_education/calculate-prek-county-2018.html b/08_education/calculate-prek-county-2018.html
diff --git a/08_education/calculate-prek-place-2014.html b/08_education/calculate-prek-place-2014.html
diff --git a/08_education/calculate-prek-place-2016.html b/08_education/calculate-prek-place-2016.html
diff --git a/08_education/calculate-prek-place-2018.html b/08_education/calculate-prek-place-2018.html
diff --git a/08_education/data/final/metrics_preschool_county_all_longitudinal.csv b/08_education/data/final/metrics_preschool_county_all_longitudinal.csv
diff --git a/08_education/data/final/metrics_preschool_county_all_longitudinal_all.csv b/08_education/data/final/metrics_preschool_county_all_longitudinal_all.csv
diff --git a/08_education/data/final/metrics_preschool_county_income_longitudinal.csv b/08_education/data/final/metrics_preschool_county_income_longitudinal.csv
diff --git a/08_education/data/final/metrics_preschool_county_race-ethnicity_longitudinal.csv b/08_education/data/final/metrics_preschool_county_race-ethnicity_longitudinal.csv
diff --git a/08_education/data/final/metrics_preschool_place_all_longitudinal_2014_16.csv b/08_education/data/final/metrics_preschool_place_all_longitudinal_2014_16.csv
diff --git a/08_education/data/final/metrics_preschool_place_all_longitudinal_all.csv b/08_education/data/final/metrics_preschool_place_all_longitudinal_all.csv
diff --git a/08_education/data/final/metrics_preschool_place_income_longitudinal.csv b/08_education/data/final/metrics_preschool_place_income_longitudinal.csv
diff --git a/08_education/data/final/metrics_preschool_place_race-ethnicity_longitudinal.csv b/08_education/data/final/metrics_preschool_place_race-ethnicity_longitudinal.csv
diff --git a/08_education/data/final/metrics_preschool_place_race-ethnicity_longitudinal_2014_16.csv b/08_education/data/final/metrics_preschool_place_race-ethnicity_longitudinal_2014_16.csv
diff --git a/08_education/preschool_county.html b/08_education/preschool_county.html
diff --git a/08_education/preschool_county.qmd b/08_education/preschool_county.qmd
diff --git a/08_education/preschool_place.html b/08_education/preschool_place.html
diff --git a/08_education/preschool_place.qmd b/08_education/preschool_place.qmd
diff --git a/08_education/run_preschool_years.qmd b/08_education/run_preschool_years.qmd
@@ -0,0 +1,98 @@
+---
+title: "Run preschool"
+date: today
+format:html
+execute: 
+  warning: false
+editor_options: 
+  chunk_output_type: console
+---
+
+## Housekeeping
+
+Import necessary libraries.
+
+```{r}
+library(quarto)
+library(here)
+
+```
+
+
+## Render QMD Years
+
+Select which years of new data you want to run the preschool place calculate file for.
+
+```{r}
+
+years = c("2014", "2016", "2018")
+```
+
+Update the quorto render function so it reads out in the correct folder
+
+```{r}
+quarto_render_move <- function(
+    input,
+    output_file = NULL,
+    output_dir = NULL,
+    ...
+) {
+
+  # Get all the input / output file names and paths
+  x <- quarto::quarto_inspect(input)
+  output_format <- names(x$formats)
+  output <- x$formats[[output_format]]$pandoc$`output-file`
+  if (is.null(output_file)) { output_file <- output }
+  input_dir <- dirname(input)
+  if (is.null(output_dir)) { output_dir <- input_dir }
+  output_path_from <- file.path(input_dir, output)
+  output_path_to <- file.path(output_dir, output_file)
+
+  # Render qmd file to input_dir
+  quarto::quarto_render(input = input, ... = ...)
+
+  # If output_dir is different from input_dir, copy the rendered output
+  # there and delete the original file
+  if (input_dir != output_dir) {
+
+    # Try to make the folder if it doesn't yet exist
+    if (!dir.exists(output_dir)) { dir.create(output_dir) }
+
+    # Now move the output to the output_dir and remove the original output
+    file.copy(
+      from = output_path_from,
+      to = output_path_to,
+      overwrite = TRUE
+    )
+    file.remove(output_path_from)
+
+    # If the output_dir is the same as input_dir, but the output_file
+    # has a different name from the input file, then just rename it
+  } else if (output_file != output) {
+    file.rename(from = output_path_from, to = output_path_to)
+  }
+}
+```
+
+Render files.
+```{r}
+
+reports <-
+  tibble(
+    input = here::here("08_education", "preschool_county.qmd"),
+    output_file = glue::glue("calculate-prek-county-{years}.html"),
+    execute_params = map(years, ~ list(year = .))
+  )
+
+pwalk(reports, quarto_render_move)
+
+
+reports <-
+  tibble(
+    input = here::here("08_education", "preschool_place.qmd"),
+    output_file = glue::glue("calculate-prek-place-{years}.html"),
+    execute_params = map(years, ~ list(year = .))
+  )
+
+pwalk(reports, quarto_render_move)
+```
diff --git a/10a_final-evaluation/evaluation_form_preschool_income_county.csv b/10a_final-evaluation/evaluation_form_preschool_income_county.csv
@@ -0,0 +1,9 @@
+,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
+,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
+User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
+Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Majority Non-White;Majority White, Non-Hispanic;Mixed Race and Ethnicity"
+User Input 1 ,share_in_preschool,2014;2016;2018;2021,Yes,Yes,income,"All;Less than $50,000;$50,000 or More"
+User Input 2,,,,,,
+User Input 3,,,,,,
+User Input 4,,,,,,
+User Input 5,,,,,,
diff --git a/10a_final-evaluation/evaluation_form_preschool_income_place.csv b/10a_final-evaluation/evaluation_form_preschool_income_place.csv
@@ -0,0 +1,9 @@
+,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
+,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
+User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
+Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Majority Non-White;Majority White, Non-Hispanic;Mixed Race and Ethnicity"
+User Input 1 ,share_in_preschool,2014;2016;2018;2021,Yes,Yes,income,"All;Less than $50,000;$50,000 or More"
+User Input 2,,,,,,
+User Input 3,,,,,,
+User Input 4,,,,,,
+User Input 5,,,,,,
diff --git a/10a_final-evaluation/evaluation_form_preschool_overall_county.csv b/10a_final-evaluation/evaluation_form_preschool_overall_county.csv
@@ -0,0 +1,9 @@
+,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
+,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
+User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
+Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Majority Non-White;Majority White, Non-Hispanic;Mixed Race and Ethnicity"
+User Input 1 ,share_in_preschool,2014;2016;2018;2021,Yes,Yes,,
+User Input 2,,,,,,
+User Input 3,,,,,,
+User Input 4,,,,,,
+User Input 5,,,,,,
diff --git a/10a_final-evaluation/evaluation_form_preschool_overall_place.csv b/10a_final-evaluation/evaluation_form_preschool_overall_place.csv
@@ -0,0 +1,9 @@
+,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
+,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
+User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
+Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Majority Non-White;Majority White, Non-Hispanic;Mixed Race and Ethnicity"
+User Input 1 ,share_in_preschool,2014;2016;2018;2021,Yes,Yes,,
+User Input 2,,,,,,
+User Input 3,,,,,,
+User Input 4,,,,,,
+User Input 5,,,,,,
diff --git a/10a_final-evaluation/evaluation_form_preschool_race_eth_county.csv b/10a_final-evaluation/evaluation_form_preschool_race_eth_county.csv
@@ -0,0 +1,9 @@
+,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
+,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
+User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
+Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Majority Non-White;Majority White, Non-Hispanic;Mixed Race and Ethnicity"
+User Input 1 ,share_in_preschool,2014;2016;2018;2021,Yes,Yes,race-ethnicity,"All;White, Non-Hispanic;Other Races and Ethnicities;Hispanic;Black, Non-Hispanic"
+User Input 2,,,,,,
+User Input 3,,,,,,
+User Input 4,,,,,,
+User Input 5,,,,,,
diff --git a/10a_final-evaluation/evaluation_form_preschool_race_eth_place.csv b/10a_final-evaluation/evaluation_form_preschool_race_eth_place.csv
@@ -0,0 +1,9 @@
+,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
+,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
+User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
+Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Majority Non-White;Majority White, Non-Hispanic;Mixed Race and Ethnicity"
+User Input 1 ,share_in_preschool,2014;2016;2018;2021,Yes,Yes,race-ethnicity,"All;White, Non-Hispanic;Other Races and Ethnicities;Hispanic;Black, Non-Hispanic"
+User Input 2,,,,,,
+User Input 3,,,,,,
+User Input 4,,,,,,
+User Input 5,,,,,,
diff --git a/functions/API/extract_ipums_aws.R b/functions/API/extract_ipums_aws.R
@@ -0,0 +1,132 @@
+  # Add library here for filepath
+  library(here)
+  library(ipumsr)
+  library(aws.s3)
+  library(tidyverse)
+
+  extract_ipums_aws <- function(extract_name, extract_date, extract_description, survey){
+
+  # Set folder path, .gz, and .xml variables
+  folder_path <- here("data", "temp", "raw")
+  extract_gz_filename <- paste0(extract_name, "_umf.dat.gz")
+  extract_xml_filename <- paste0(extract_name, "_umf.xml")
+
+
+  #Check if file exists in AWS
+
+  if (aws.s3::object_exists(paste0(s3_dir, "/", extract_name, "_", extract_date, ".rds"), bucket = my_bucket)){
+
+    acs_imported <- s3read_using(FUN=readRDS, 
+                                 bucket = my_bucket, 
+                                 object=paste0(s3_dir, "/", extract_name, "_", extract_date, ".rds"))
+  } else{
+
+  # Create the folder path if it doesn't exist
+  if (!dir.exists(folder_path)) {
+    dir.create(folder_path, recursive = TRUE)
+  }
+
+  # Check if extract already exists in your directory. If it does this function will read in the existing data.
+
+    #If extract does not exist, create the extract using the IPUMS API
+    usa_ext_umf <-
+      define_extract_usa(
+        description = extract_description,
+        samples = c(survey),
+        variables = c(
+          "ADJUST",
+          "STATEFIP",
+          "PUMA",
+          "GQ",
+          "HHINCOME",
+          "AGE",
+          "EMPSTAT",
+          "VACANCY",
+          "PERNUM",
+          "RACE",
+          "HISPAN",
+          "EDUCD",
+          "GRADEATT",
+          "SEX",
+          "DIFFCARE",
+          "DIFFSENS",
+          "DIFFMOB",
+          "DIFFPHYS",
+          "DIFFREM",
+          "CBPERNUM"
+        )
+      )
+
+    #Submit the extract. 
+    usa_ext_umf_submitted <- submit_extract(usa_ext_umf)
+
+    usa_ext_complete <- wait_for_extract(usa_ext_umf_submitted)
+
+    #The directory is set to download into the "raw" data folder inside of the universal data/temp. If the data already exists this step will be skipped.
+    filepath <-
+      download_extract(
+        usa_ext_umf_submitted,
+        download_dir =  here(folder_path),
+        progress = TRUE
+      )
+
+    #Rename extract file
+    ipums_files <-
+      list.files(paste0(here(folder_path)), full.names = TRUE) %>%
+      as_tibble() %>%
+      filter(str_detect(value, "dat.gz|xml"), !str_detect(value, "umf")) %>%
+      pull()
+
+    file.rename(ipums_files, c(
+      here(folder_path, extract_gz_filename),
+      here(folder_path, extract_xml_filename)
+    ))
+
+
+    # Read extract file
+    ddi <-
+      read_ipums_ddi(here(folder_path, extract_xml_filename))
+
+    micro_data <-
+      read_ipums_micro(
+        ddi,
+        data_file = here(folder_path, extract_gz_filename)
+      )
+
+    #DDI is a codebook that is used by IPUMSR to format the micro data downloaded
+    #Lower variable names and get rid of unnecessary variables
+    acs_imported <- micro_data %>%
+      rename_with(tolower) %>% 
+      select(-serial, -raced, -strata, - cluster, -hispand, -empstatd)
+
+    rm(micro_data)
+
+    #Zap labels and reformat State and PUMA variable
+    acs_imported <- acs_imported %>%
+      mutate(  
+        across(c(sample, gq, race, hispan), ~as_factor(.x)),
+        across(c(sample, gq, race, hispan, sex, diffcare, diffsens, diffmob, diffphys, diffrem), ~as_factor(.x)),
+        across(c(statefip, puma, hhincome, vacancy, age, empstat), ~zap_labels(.x)),
+        statefip = sprintf("%0.2d", as.numeric(statefip)),
+        puma = sprintf("%0.5d", as.numeric(puma)),
+        unique_person_id = paste0(sample, cbserial, cbpernum)
+      )
+
+    # my-bucket 
+    my_bucket <- "mobility-from-poverty-test"
+
+    # write file to S3
+    tmp <- tempfile()
+    on.exit(unlink(tmp))
+    saveRDS(acs_imported, file = tmp, compress = TRUE)
+
+    # put object with an upload progress bar
+    put_object(tmp, object = paste0(s3_dir, "/", extract_name, "_", extract_date, ".rds"), bucket = my_bucket, 
+               show_progress = TRUE, multipart = TRUE)
+
+  }
+    #Return the ACS data set
+    return(acs_imported)
+
+  }
+