Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update college metric. #450

Open
wants to merge 1 commit into
base: version2025
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6,708 changes: 6,708 additions & 0 deletions 08_education/calculate-college-county-2014.html

Large diffs are not rendered by default.

6,784 changes: 6,784 additions & 0 deletions 08_education/calculate-college-county-2016.html

Large diffs are not rendered by default.

6,782 changes: 6,782 additions & 0 deletions 08_education/calculate-college-county-2018.html

Large diffs are not rendered by default.

1,625 changes: 920 additions & 705 deletions 08_education/college_metrics_county.html

Large diffs are not rendered by default.

254 changes: 216 additions & 38 deletions 08_education/college_metrics_county.qmd

Large diffs are not rendered by default.

1,617 changes: 898 additions & 719 deletions 08_education/college_metrics_place.html

Large diffs are not rendered by default.

233 changes: 197 additions & 36 deletions 08_education/college_metrics_place.qmd

Large diffs are not rendered by default.

10,256 changes: 8,271 additions & 1,985 deletions 08_education/data/final/metrics_college_county_all_longitudinal.csv

Large diffs are not rendered by default.

27,150 changes: 23,004 additions & 4,146 deletions 08_education/data/final/metrics_college_county_disability_longitudinal.csv

Large diffs are not rendered by default.

27,876 changes: 23,367 additions & 4,509 deletions 08_education/data/final/metrics_college_county_gender_longitudinal.csv

Large diffs are not rendered by default.

40,678 changes: 36,054 additions & 4,624 deletions 08_education/data/final/metrics_college_county_race-ethnicity_longitudinal.csv

Large diffs are not rendered by default.

2,134 changes: 1,553 additions & 581 deletions 08_education/data/final/metrics_college_place_all_longitudinal.csv

Large diffs are not rendered by default.

5,316 changes: 4,116 additions & 1,200 deletions 08_education/data/final/metrics_college_place_disability_longitudinal.csv

Large diffs are not rendered by default.

6,090 changes: 4,503 additions & 1,587 deletions 08_education/data/final/metrics_college_place_gender_longitudinal.csv

Large diffs are not rendered by default.

8,030 changes: 6,445 additions & 1,585 deletions 08_education/data/final/metrics_college_place_race-ethnicity_longitudinal.csv

Large diffs are not rendered by default.

88 changes: 88 additions & 0 deletions 08_education/run_college.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
---
title: "Run college"
date: today
format:html
execute:
warning: false
editor_options:
chunk_output_type: console
---

## Housekeeping

Import necessary libraries.

```{r}
library(quarto)
library(here)

```


## Render QMD Years

Select which years of new data you want to run the preschool place calculate file for.

```{r}

years = c("2014", "2016", "2018")
```

Update the quorto render function so it reads out in the correct folder

```{r}
quarto_render_move <- function(
input,
output_file = NULL,
output_dir = NULL,
...
) {

# Get all the input / output file names and paths
x <- quarto::quarto_inspect(input)
output_format <- names(x$formats)
output <- x$formats[[output_format]]$pandoc$`output-file`
if (is.null(output_file)) { output_file <- output }
input_dir <- dirname(input)
if (is.null(output_dir)) { output_dir <- input_dir }
output_path_from <- file.path(input_dir, output)
output_path_to <- file.path(output_dir, output_file)

# Render qmd file to input_dir
quarto::quarto_render(input = input, ... = ...)

# If output_dir is different from input_dir, copy the rendered output
# there and delete the original file
if (input_dir != output_dir) {

# Try to make the folder if it doesn't yet exist
if (!dir.exists(output_dir)) { dir.create(output_dir) }

# Now move the output to the output_dir and remove the original output
file.copy(
from = output_path_from,
to = output_path_to,
overwrite = TRUE
)
file.remove(output_path_from)

# If the output_dir is the same as input_dir, but the output_file
# has a different name from the input file, then just rename it
} else if (output_file != output) {
file.rename(from = output_path_from, to = output_path_to)
}
}
```

Render files.
```{r}

reports <-
tibble(
input = here::here("08_education", "college_metrics_place.qmd"),
output_file = glue::glue("calculate-college-county-{years}.html"),
execute_params = map(years, ~ list(year = .))
)

pwalk(reports, quarto_render_move)
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Black, Non-Hispanic;Hispanic;Other Races and Ethnicities;White, Non-Hispanic"
User Input 1 ,share_hs_degree,2014;2016;2018;2021,Yes,Yes,disability,All;With Disability;Without Disability
User Input 2,,,,,,
User Input 3,,,,,,
User Input 4,,,,,,
User Input 5,,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Black, Non-Hispanic;Hispanic;Other Races and Ethnicities;White, Non-Hispanic"
User Input 1 ,share_hs_degree,2014;2016;2018;2021,Yes,Yes,disability,All;With Disability;Without Disability
User Input 2,,,,,,
User Input 3,,,,,,
User Input 4,,,,,,
User Input 5,,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Black, Non-Hispanic;Hispanic;Other Races and Ethnicities;White, Non-Hispanic"
User Input 1 ,share_hs_degree,2014;2016;2018;2021,Yes,Yes,gender,All;Male;Female
User Input 2,,,,,,
User Input 3,,,,,,
User Input 4,,,,,,
User Input 5,,,,,,
9 changes: 9 additions & 0 deletions 10a_final-evaluation/evaluation_form_college_gender_place.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Black, Non-Hispanic;Hispanic;Other Races and Ethnicities;White, Non-Hispanic"
User Input 1 ,share_hs_degree,2014;2016;2018;2021,Yes,Yes,gender,All;Male;Female
User Input 2,,,,,,
User Input 3,,,,,,
User Input 4,,,,,,
User Input 5,,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Majority Non-White;Majority White, Non-Hispanic;Mixed Race and Ethnicity"
User Input 1 ,share_hs_degree,2014;2016;2018;2021,Yes,Yes,,
User Input 2,,,,,,
User Input 3,,,,,,
User Input 4,,,,,,
User Input 5,,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Majority Non-White;Majority White, Non-Hispanic;Mixed Race and Ethnicity"
User Input 1 ,share_hs_degree,2014;2016;2018;2021,Yes,Yes,,
User Input 2,,,,,,
User Input 3,,,,,,
User Input 4,,,,,,
User Input 5,,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Black, Non-Hispanic;Hispanic;Other Races and Ethnicities;White, Non-Hispanic"
User Input 1 ,share_hs_degree,2014;2016;2018;2021,Yes,Yes,race-ethnicity,"All;Black, Non-Hispanic;Hispanic;Other Races and Ethnicities;White, Non-Hispanic"
User Input 2,,,,,,
User Input 3,,,,,,
User Input 4,,,,,,
User Input 5,,,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,This form to be filled in for the data in the subgroup files. If the metric has multiple variables please include input for each variable in the file.,,,,,
,"After completing this file, save it to the final data folder for the metric it relates to.",,,,,
User Input,Metric name - As written in final data file�,"All Years (use "";"" no space)�",Confidence intervals? (Yes or No)�,Quality variables? available (Yes or No)�,Subgroup Type (leave blank if none),"Subgroup Values (include ""All"" and use "";"" no space)"
Example (leave this row alone),Transportation_index_price�,2016;2018;2022�,Yes,Yes,race-ethnicity,"All;Black, Non-Hispanic;Hispanic;Other Races and Ethnicities;White, Non-Hispanic"
User Input 1 ,share_hs_degree,2014;2016;2018;2021,Yes,Yes,race-ethnicity,"All;Black, Non-Hispanic;Hispanic;Other Races and Ethnicities;White, Non-Hispanic"
User Input 2,,,,,,
User Input 3,,,,,,
User Input 4,,,,,,
User Input 5,,,,,,
132 changes: 132 additions & 0 deletions functions/API/extract_ipums_aws.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Add library here for filepath
library(here)
library(ipumsr)
library(aws.s3)
library(tidyverse)

extract_ipums_aws <- function(extract_name, extract_date, extract_description, survey){

# Set folder path, .gz, and .xml variables
folder_path <- here("data", "temp", "raw")
extract_gz_filename <- paste0(extract_name, "_umf.dat.gz")
extract_xml_filename <- paste0(extract_name, "_umf.xml")


#Check if file exists in AWS

if (aws.s3::object_exists(paste0(s3_dir, "/", extract_name, "_", extract_date, ".rds"), bucket = my_bucket)){

acs_imported <- s3read_using(FUN=readRDS,
bucket = my_bucket,
object=paste0(s3_dir, "/", extract_name, "_", extract_date, ".rds"))
} else{

# Create the folder path if it doesn't exist
if (!dir.exists(folder_path)) {
dir.create(folder_path, recursive = TRUE)
}

# Check if extract already exists in your directory. If it does this function will read in the existing data.

#If extract does not exist, create the extract using the IPUMS API
usa_ext_umf <-
define_extract_usa(
description = extract_description,
samples = c(survey),
variables = c(
"ADJUST",
"STATEFIP",
"PUMA",
"GQ",
"HHINCOME",
"AGE",
"EMPSTAT",
"VACANCY",
"PERNUM",
"RACE",
"HISPAN",
"EDUCD",
"GRADEATT",
"SEX",
"DIFFCARE",
"DIFFSENS",
"DIFFMOB",
"DIFFPHYS",
"DIFFREM",
"CBPERNUM"
)
)

#Submit the extract.
usa_ext_umf_submitted <- submit_extract(usa_ext_umf)

usa_ext_complete <- wait_for_extract(usa_ext_umf_submitted)

#The directory is set to download into the "raw" data folder inside of the universal data/temp. If the data already exists this step will be skipped.
filepath <-
download_extract(
usa_ext_umf_submitted,
download_dir = here(folder_path),
progress = TRUE
)

#Rename extract file
ipums_files <-
list.files(paste0(here(folder_path)), full.names = TRUE) %>%
as_tibble() %>%
filter(str_detect(value, "dat.gz|xml"), !str_detect(value, "umf")) %>%
pull()

file.rename(ipums_files, c(
here(folder_path, extract_gz_filename),
here(folder_path, extract_xml_filename)
))


# Read extract file
ddi <-
read_ipums_ddi(here(folder_path, extract_xml_filename))

micro_data <-
read_ipums_micro(
ddi,
data_file = here(folder_path, extract_gz_filename)
)

#DDI is a codebook that is used by IPUMSR to format the micro data downloaded
#Lower variable names and get rid of unnecessary variables
acs_imported <- micro_data %>%
rename_with(tolower) %>%
select(-serial, -raced, -strata, - cluster, -hispand, -empstatd)

rm(micro_data)

#Zap labels and reformat State and PUMA variable
acs_imported <- acs_imported %>%
mutate(
across(c(sample, gq, race, hispan), ~as_factor(.x)),
across(c(sample, gq, race, hispan, sex, diffcare, diffsens, diffmob, diffphys, diffrem), ~as_factor(.x)),
across(c(statefip, puma, hhincome, vacancy, age, empstat), ~zap_labels(.x)),
statefip = sprintf("%0.2d", as.numeric(statefip)),
puma = sprintf("%0.5d", as.numeric(puma)),
unique_person_id = paste0(sample, cbserial, cbpernum)
)

# my-bucket
my_bucket <- "mobility-from-poverty-test"

# write file to S3
tmp <- tempfile()
on.exit(unlink(tmp))
saveRDS(acs_imported, file = tmp, compress = TRUE)

# put object with an upload progress bar
put_object(tmp, object = paste0(s3_dir, "/", extract_name, "_", extract_date, ".rds"), bucket = my_bucket,
show_progress = TRUE, multipart = TRUE)

}
#Return the ACS data set
return(acs_imported)

}

Loading