Skip to content

Commit

Permalink
updated A10 writing
Browse files Browse the repository at this point in the history
  • Loading branch information
mle2718 committed Dec 12, 2024
1 parent d85fe3e commit 62577d9
Show file tree
Hide file tree
Showing 2 changed files with 305 additions and 85 deletions.
136 changes: 61 additions & 75 deletions Amendment10/writing/Amendment10.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ https://github.com/NEFSC/READ-SSB-Lee-regulatory-analyses

Compiling this document requires a few pieces of data-extraction code to be run.

1. You must be able to see this folder ``nefscfile\BLAST\READ-SSB-Lee-MRIP-BLAST\data_older\raw``. It contains the raw MRIP data that has been imported into Rds format. These were constructed with ``https://github.com/NEFSC/READ-SSB-MRIP-BLAST/R_code/data_extracting_processing/extraction/pull_in_MRIP.R``.


1. You have to run the Amendment_10_data_prep.Rmd file at least 1 time.
2. you have to set the in-data vintage.



Expand Down Expand Up @@ -63,6 +62,8 @@ options(tinytex.verbose = TRUE)
# RFA data
RFA_filepath<-file.path("//nefscdata","RFA_EO12866_Guidelines" ,"Ownership Data", "current data and metadata","affiliates_2024_06_01.Rdata")
BLAST_raw_filepath<-file.path("//nefscfile","BLAST" ,"READ-SSB-Lee-MRIP-BLAST", "data_folder","raw")
#############################################################################
Expand All @@ -72,8 +73,9 @@ deflate_by <- "year"
base_year = 2022 # base year for GDP deflator - Maximum = 2020, Minimum = 1947 - max(GDPDEF_quarterly$Year)
###################################################################################################
vintage_string<-Sys.Date()
vintage_string<-gsub("-","_",vintage_string)
vintage_string<-"2024_10_30"
```

<!---- The Get_Deflators chunk loads the GDPDEF series from the internet so you can deflate. --->
Expand Down Expand Up @@ -122,82 +124,66 @@ base_year_index <- as.numeric(base_year_index)
GDPDEF_annual <- GDPDEF_annual %>%
mutate(GDPDEF_multiplic_factor = GDPDEF/(base_year_index)) %>%
select(c(Year, GDPDEF_multiplic_factor)) %>%
dplyr::filter(Year>=2004 & Year<=2024)
dplyr::filter(Year>=2000 & Year<=2024)
```

<!---- Get Census data -- 2022 ACS income and percent non-white --->
```{r get_census, eval=TRUE}
# Pull in and tidy up some Census data on household income
income <- getCensus(
name = "acs/acs5/subject",
vintage = 2022,
vars = c("S1901_C01_001E", "S1901_C01_010E","S1901_C01_011E","S1901_C01_012E","S1901_C01_013E","S0101_C01_001E", "S0101_C02_028E","S0601_C01_001E","S0601_C01_022E"),
region = "zip code tabulation area:*",
show_call=FALSE)
income<-income %>%
rename ( households=S1901_C01_001E,
household_inc_150_200pct =S1901_C01_010E,
household_inc_200pct=S1901_C01_011E,
household_median_inc=S1901_C01_012E,
household_mean_inc=S1901_C01_013E,
total_population=S0101_C01_001E,
population_pct_over60=S0101_C02_028E,
total_population_s0601=S0601_C01_001E,
pct_white_alone =S0601_C01_022E
) %>%
mutate(obscured_median=case_when(household_median_inc<0 ~ 1, .default=0),
obscured_mean=case_when(household_mean_inc<0 ~ 1, .default=0)
)
# Read in the state-zcta correspondence file.
zcta_state<-read_delim("https://www2.census.gov/geo/docs/maps-data/data/rel2020/zcta520/tab20_zcta520_county20_natl.txt", delim="|", guess_max=2000)
# Contract down to the fraction in each area.
zcta_state <- zcta_state %>%
dplyr::filter(is.na(OID_ZCTA5_20)==FALSE) %>%
group_by(OID_ZCTA5_20) %>%
mutate(total_area=sum(AREALAND_PART)) %>%
mutate(fraction=AREALAND_PART/total_area) %>%
select(c(GEOID_ZCTA5_20, OID_ZCTA5_20,OID_COUNTY_20, GEOID_COUNTY_20, fraction))
# Pull in and tidy up some Census data on household income at the state level
county_income <- getCensus(
name = "acs/acs5/subject",
vintage = 2022,
vars = c("S1901_C01_012E","S1901_C01_013E"),
region = "county:*",
show_call=FALSE)
county_income<-county_income %>%
rename (county_household_median_inc=S1901_C01_012E,
county_household_mean_inc=S1901_C01_013E
) %>%
mutate(st_co=paste0(state,county))
# use county income to create an in imputed income based on county income
income_fill<-zcta_state %>%
left_join(county_income, by=join_by(GEOID_COUNTY_20==st_co), relationship="many-to-one") %>%
mutate(wmedian=fraction*county_household_median_inc,
wmean=fraction*county_household_mean_inc) %>%
group_by(GEOID_ZCTA5_20, OID_ZCTA5_20) %>%
summarise(imputed_household_median_income=sum(wmedian),
imputed_household_mean_income=sum(wmean)) %>%

```{r Readin_MRIP, include=FALSE, eval=TRUE}
target_totals_stco<-readRDS(file=here("data_folder", "main", paste0("targeted_stco_",vintage_string,".Rds")))
target_totals_zip<-readRDS(file=here("data_folder", "main", paste0("targeted_zip_",vintage_string,".Rds")))
```



```{r Readin_Census, include=FALSE, eval=TRUE}
county_income<-readRDS(file=here("data_folder", "main", paste0("county_income",vintage_string,".Rds")))
zcta_income<-readRDS(file=here("data_folder", "main", paste0("income_",vintage_string,".Rds")))
zcta_state<-readRDS(file=here("data_folder", "main", paste0("zcta_state",vintage_string,".Rds")))
zcta<-readRDS(file=here("data_folder", "main", paste0("zcta",vintage_string,".Rds")))
incomes_test<-zcta_income %>%
group_by(zip_code_tabulation_area) %>%
dplyr::mutate(count=n()) %>%
ungroup()
income<-income %>%
left_join(income_fill, by=join_by(zip_code_tabulation_area==GEOID_ZCTA5_20), relationship="one-to-one")%>%
mutate(household_median_inc=case_when(obscured_median==1 ~ imputed_household_median_income, .default=household_median_inc),
household_mean_inc=case_when(obscured_mean<0 ~ imputed_household_mean_income, .default=household_mean_inc)
) %>%
select(-c(imputed_household_mean_income,imputed_household_median_income))
```

```{r join_mrip_to_income, include=FALSE, eval=TRUE}
target_totals_zip<-target_totals_zip %>%
left_join(zcta_income, by=join_by(zip==zip_code_tabulation_area), relationship="many-to-one")%>%
arrange(zip)
# Approximately 5% of the trips are not matched to a zcta. That's not too bad
tst<-target_totals_zip %>%
mutate(unmatched=is.na(households))%>%
group_by(unmatched)%>%
summarise(dtrip=sum(dtrip))
income_stats<-target_totals_zip %>%
dplyr::filter(is.na(household_mean_inc)==FALSE) %>%
dplyr::filter(is.na(pct_white_alone)==FALSE) %>%
dplyr::filter(is.na(dtrip)==FALSE) %>%
ungroup() %>%
summarise(dtrip=sum(dtrip),
whousehold_mean=sum(dtrip*household_mean_inc,na.rm=TRUE),
wpct_white_alone=sum(dtrip*pct_white_alone, na.rm=TRUE)
) %>%
mutate(household_mean=whousehold_mean/dtrip,
pct_white_alone=wpct_white_alone/dtrip)
summary(income)
```







Loading

0 comments on commit 62577d9

Please sign in to comment.