-
Notifications
You must be signed in to change notification settings - Fork 0
/
acfrs_gen_purpose_2020.R
138 lines (102 loc) · 5.58 KB
/
acfrs_gen_purpose_2020.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
options(scipen = 999)
library(tidyverse)
library(dplyr)
library(DT)
library(janitor)
source("census.R")
# Entity names in the original Acfrs database are not obvious, i.e.,
# They do not always contain a certain word to indicate its government type (i.e., city, town, county, etc).
# *ACFRs data has government_id (which is called census_id in the ACFRs portal).
# This is government unit identifier, and is not geo_id that are often used in many census datasets.
#
# Two points above constitute challenges when we need to:
# 1) adding population field in acfrs data, and
# 2) sort out what type of entities are collected.
#
# * Two main ways to solve the problems:
# - Use a "middle file" to link geo_id and government_ID
# - Joining by names and states
##### ACFRs General Purpose 2020
# step 1: get all general purpose entities in acfrs, most contains governmentID
acfrs_general_purpose_20 <- readRDS("data/data_from_dbsite_2020.RDS") %>%
filter(category == "General Purpose") %>%
select(-c(5:11, category, has_unconfirmed, component_unit_of_id)) %>%
rename(government_id = census_id,
state.abb = state) %>% # census_id in Acfrs database is actually government_id
# some government_id in ACFRs has 13 characters-> need to add 0
mutate(government_id = ifelse(str_length(government_id) < 14, paste0("0", government_id), government_id)) %>%
# step 2: join with the middle file to get geo_id into acfrs data
left_join(governmentID_geoID, by = c("state.abb", "government_id")) %>%
# cleaning to match with names in census
mutate(name = str_to_lower(name),
name = str_remove_all(name, "(\\.)|(\\')"),
# In LA, ACFRs of some parish titled "parish police jury" -> Geoff: these are counties ACFRs
name = str_remove_all(name, "police jury"),
name = str_remove_all(name, " fiscal court"),
name = str_trim(name)) %>%
mutate(name = case_when(name == "yakutat borough" & state.abb == "AK" ~ "yakutat city and borough",
name == "dona ana county" & state.abb == "NM" ~ "doña ana county",
name == "st marys county" & state.abb == "MD" ~ "st mary's county",
name == "athens-clarke county" & state.abb == "GA" ~ "st mary's county",
name == "greeneville-greene county" & state.abb == "TN" ~ "greene county",
name == "sevierville-sevier county" & state.abb == "TN" ~ "sevier county",
name == "lynchburg_moore county" & state.abb == "TN" ~ "moore county",
name == "hartsville-trousdale county" & state.abb == "TN" ~ "trousdale county",
name == "nashville and davidson county" & state.abb == "TN" ~ "davidson county",
name == "lafayette city-parish consolidated government" & state.abb == "LA" ~ "lafayette parish",
TRUE ~ name)) %>%
# Step 3: extract geo_id part to map with census
mutate(geo_id = str_extract(geo_id, "US(.*)"),
geo_id = str_remove_all(geo_id, "US"))
####### States
acfrs_state_2020 <- acfrs_general_purpose_20 %>%
filter(str_detect(name, "(state of)|(district of columbia)|(commonwealth)")) %>%
filter(!str_detect(name, "yap|kosrae")) %>%
filter(!str_detect(name, "(iowa single audit)|(puerto rico)")) %>%
mutate(name = str_remove_all(name, "(state of)|(commonwealth of)"),
name = str_trim(name))
# state from census
# census_state <- census_all %>% filter(sumlev == 40) %>%
# select(state.abb, geo_id, population)
# Joining acfrs states & census states: state_gov_2020
state_gov_2020 <- acfrs_state_2020 %>% select(-geo_id) %>%
left_join(census_state, by = c("state.abb")) %>%
select(-government_id)
# create id list to recognize state entities in later years.
state_acfrs_id <- state_gov_2020 %>% select(state.abb, name, id)
state_gov_2020 %>% write.csv("output/state_gov_20.csv")
####### Counties
acfrs_county_20 <- acfrs_general_purpose_20 %>%
filter(grepl("county|municipality|parish", name)) %>% #In Louisiana, counties are called Parishes.
filter(!str_detect(name, "\\(")) # not county. Eg: waverly township (van buren county)
# Alaska counties in census
alaska_county_census <- census_county %>% filter(state.abb == "AK")
# Alaska counties in Acfrs:
alaska_county_acfrs_20 <- acfrs_general_purpose_20 %>%
filter(state.abb == "AK") %>%
filter(str_detect(name, "(borough)|(municipality)$"))
# join acfrs with census population
county_gov_20 <- acfrs_county_20 %>%
rbind(alaska_county_acfrs_20) %>%
# most acfrs_county do not have geo_id --> must join by state.abb and name
left_join(census_county, by = c("state.abb", "name" = "name_census")) %>%
# drop non-county entities
arrange(desc(population)) %>%
drop_na(population)
county_gov_20 %>% write.csv("output/county_gov_20.csv")
# Save county ID to filter in later years
county_acfrs_id_20 <- county_gov_20 %>% select(state.abb, name, id)
########## Incorporated Place & Minor Civil Division
# ACFRs:
acfrs_place_division_20 <- acfrs_general_purpose_20 %>%
# exclude state and county
filter(!id %in% acfrs_state_2020$id) %>%
filter(!id %in% county_gov_20$id) %>% distinct()
# Join Incorporated Place in ACFRs to Census
place_division_gov_20 <- acfrs_place_division_20 %>%
left_join(census_place_division, by= c("geo_id", "state.abb")) %>%
drop_na(population)#%>% distinct(state.abb, name) # only 9146 distinct
place_division_gov_20 %>% write.csv("output/place_division_gov_20.csv")
#### City
city_gov_20 <- place_division_gov_20 %>%
filter(geo_id %in% census_city$geo_id)