updated A10 writing

NEFSC · Dec 12, 2024 · 62577d9 · 62577d9
1 parent d85fe3e
commit 62577d9
Show file tree

Hide file tree

Showing 2 changed files with 305 additions and 85 deletions.
diff --git a/Amendment10/writing/Amendment10.Rmd b/Amendment10/writing/Amendment10.Rmd
@@ -21,9 +21,8 @@ https://github.com/NEFSC/READ-SSB-Lee-regulatory-analyses
 
 Compiling this document requires a few pieces of data-extraction code to be run. 
 
-1.  You must be able to see this folder ``nefscfile\BLAST\READ-SSB-Lee-MRIP-BLAST\data_older\raw``. It contains the raw MRIP data that has been imported into Rds format. These were constructed with ``https://github.com/NEFSC/READ-SSB-MRIP-BLAST/R_code/data_extracting_processing/extraction/pull_in_MRIP.R``.  
-
-
+1. You have to run the Amendment_10_data_prep.Rmd file at least 1 time.
+2. you have to set the in-data vintage.
 
 
 
@@ -63,6 +62,8 @@ options(tinytex.verbose = TRUE)
 
 # RFA data
 RFA_filepath<-file.path("//nefscdata","RFA_EO12866_Guidelines" ,"Ownership Data", "current data and metadata","affiliates_2024_06_01.Rdata")
+BLAST_raw_filepath<-file.path("//nefscfile","BLAST" ,"READ-SSB-Lee-MRIP-BLAST", "data_folder","raw")
+
 
 
 #############################################################################
@@ -72,8 +73,9 @@ deflate_by <- "year"
 base_year = 2022 # base year for GDP deflator - Maximum = 2020, Minimum = 1947  - max(GDPDEF_quarterly$Year)
 
 
-  
-###################################################################################################
+vintage_string<-Sys.Date()
+vintage_string<-gsub("-","_",vintage_string)
+vintage_string<-"2024_10_30"
 ```
 
  <!----  The Get_Deflators chunk loads the GDPDEF series from the internet so you can deflate.  --->
@@ -122,82 +124,66 @@ base_year_index <- as.numeric(base_year_index)
 GDPDEF_annual <- GDPDEF_annual %>%
   mutate(GDPDEF_multiplic_factor = GDPDEF/(base_year_index)) %>%
   select(c(Year, GDPDEF_multiplic_factor)) %>%
-  dplyr::filter(Year>=2004 & Year<=2024)
+  dplyr::filter(Year>=2000 & Year<=2024)
 
 ```
 
- <!----  Get Census data -- 2022 ACS income and percent non-white  --->
-```{r get_census, eval=TRUE}
-# Pull in and tidy up some Census data on household income
-income <- getCensus(
-  name = "acs/acs5/subject",
-  vintage = 2022,
-  vars = c("S1901_C01_001E",  "S1901_C01_010E","S1901_C01_011E","S1901_C01_012E","S1901_C01_013E","S0101_C01_001E", "S0101_C02_028E","S0601_C01_001E","S0601_C01_022E"),
-  region = "zip code tabulation area:*", 
-  show_call=FALSE)
-
-income<-income %>%
-  rename ( households=S1901_C01_001E,
-           household_inc_150_200pct =S1901_C01_010E,
-           household_inc_200pct=S1901_C01_011E,
-           household_median_inc=S1901_C01_012E, 
-           household_mean_inc=S1901_C01_013E,
-           total_population=S0101_C01_001E,
-           population_pct_over60=S0101_C02_028E,
-           total_population_s0601=S0601_C01_001E,
-           pct_white_alone =S0601_C01_022E
-           ) %>%
-  mutate(obscured_median=case_when(household_median_inc<0 ~ 1, .default=0),
-         obscured_mean=case_when(household_mean_inc<0 ~ 1, .default=0)
-         )
-
-
-# Read in the state-zcta correspondence file.
-zcta_state<-read_delim("https://www2.census.gov/geo/docs/maps-data/data/rel2020/zcta520/tab20_zcta520_county20_natl.txt", delim="|", guess_max=2000)
-
-# Contract down to the fraction in each area.
-zcta_state <- zcta_state %>%
-  dplyr::filter(is.na(OID_ZCTA5_20)==FALSE) %>%
-  group_by(OID_ZCTA5_20) %>%
-  mutate(total_area=sum(AREALAND_PART)) %>%
-  mutate(fraction=AREALAND_PART/total_area) %>%
-  select(c(GEOID_ZCTA5_20, OID_ZCTA5_20,OID_COUNTY_20, GEOID_COUNTY_20, fraction))
-
-
-
-# Pull in and tidy up some Census data on household income at the state level
-county_income <- getCensus(
-  name = "acs/acs5/subject",
-  vintage = 2022,
-  vars = c("S1901_C01_012E","S1901_C01_013E"),
-  region = "county:*", 
-  show_call=FALSE)
-
-county_income<-county_income %>%
-  rename (county_household_median_inc=S1901_C01_012E, 
-           county_household_mean_inc=S1901_C01_013E
-  ) %>%
-  mutate(st_co=paste0(state,county))
-
-
-# use county income to create an in imputed income based on county income
-income_fill<-zcta_state %>%
-  left_join(county_income, by=join_by(GEOID_COUNTY_20==st_co), relationship="many-to-one") %>%
-  mutate(wmedian=fraction*county_household_median_inc,
-         wmean=fraction*county_household_mean_inc) %>%
-  group_by(GEOID_ZCTA5_20, OID_ZCTA5_20) %>%
-  summarise(imputed_household_median_income=sum(wmedian),
-           imputed_household_mean_income=sum(wmean)) %>%
+
+```{r Readin_MRIP, include=FALSE, eval=TRUE}
+
+target_totals_stco<-readRDS(file=here("data_folder", "main", paste0("targeted_stco_",vintage_string,".Rds")))
+target_totals_zip<-readRDS(file=here("data_folder", "main", paste0("targeted_zip_",vintage_string,".Rds")))
+
+
+```
+
+
+
+```{r Readin_Census, include=FALSE, eval=TRUE}
+county_income<-readRDS(file=here("data_folder", "main", paste0("county_income",vintage_string,".Rds")))
+zcta_income<-readRDS(file=here("data_folder", "main", paste0("income_",vintage_string,".Rds")))
+zcta_state<-readRDS(file=here("data_folder", "main", paste0("zcta_state",vintage_string,".Rds")))
+zcta<-readRDS(file=here("data_folder", "main", paste0("zcta",vintage_string,".Rds")))
+
+
+incomes_test<-zcta_income %>%
+  group_by(zip_code_tabulation_area) %>%
+  dplyr::mutate(count=n()) %>%
   ungroup()
 
-income<-income %>%
-  left_join(income_fill, by=join_by(zip_code_tabulation_area==GEOID_ZCTA5_20), relationship="one-to-one")%>%
-  mutate(household_median_inc=case_when(obscured_median==1 ~ imputed_household_median_income, .default=household_median_inc),
-         household_mean_inc=case_when(obscured_mean<0 ~ imputed_household_mean_income, .default=household_mean_inc)
-         ) %>%
-  select(-c(imputed_household_mean_income,imputed_household_median_income))
+```
+
+```{r join_mrip_to_income, include=FALSE, eval=TRUE}
+
+target_totals_zip<-target_totals_zip %>%
+  left_join(zcta_income, by=join_by(zip==zip_code_tabulation_area), relationship="many-to-one")%>%
+  arrange(zip)
+
+# Approximately 5% of the trips are not matched to a zcta. That's not too bad 
+tst<-target_totals_zip %>%
+  mutate(unmatched=is.na(households))%>%
+  group_by(unmatched)%>%
+    summarise(dtrip=sum(dtrip))
+
+
+income_stats<-target_totals_zip %>%
+  dplyr::filter(is.na(household_mean_inc)==FALSE)  %>%
+  dplyr::filter(is.na(pct_white_alone)==FALSE)  %>%
+  dplyr::filter(is.na(dtrip)==FALSE)  %>%
+  ungroup() %>%
+  summarise(dtrip=sum(dtrip), 
+            whousehold_mean=sum(dtrip*household_mean_inc,na.rm=TRUE),
+            wpct_white_alone=sum(dtrip*pct_white_alone, na.rm=TRUE)
+            ) %>%
+  mutate(household_mean=whousehold_mean/dtrip,
+         pct_white_alone=wpct_white_alone/dtrip)
+
 
-summary(income)
 ```
 
 
+
+
+
+
+