-
Notifications
You must be signed in to change notification settings - Fork 8
/
Makefile
183 lines (136 loc) · 5.35 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
###############################################################################
#
# ILLINOIS STATE BOARD OF ELECTION CAMPAIGN FINANCE LOADER
#
# Run `make help` to see commands.
#
# You must have a .env file with:
#
# ILCAMPAIGNCASH_FTP_USER=<your-ftp-user>
# ILCAMPAIGNCASH_FTP_PASSWORD=<your-ftp-password>
# PGHOST=<your-pghost>
# PGPORT=<your-pgport>
# PGDATABASE=<your-database-name>
# PGUSER=<your-db-user>
# PGPASSWORD=<your-db-password>
#
###############################################################################
# Include .env configuration
include .env
export
# Activate Python environment
PIPENV = pipenv run
# Schemas
SCHEMAS = raw public
# Source tables
TABLES = $(basename $(notdir $(wildcard sql/tables/*.sql)))
# Views
VIEWS = $(basename $(notdir $(wildcard sql/views/*.sql)))
##@ Basic usage
.PHONY: all
all: views db/vacuum ## Build database
.PHONY: download
download: $(patsubst %, data/download/%.txt, $(TABLES)) ## Download source data
.PHONY: process
process: $(patsubst %, data/processed/%.csv, $(TABLES)) ## Process source data
.PHONY: load
load: $(patsubst %, db/csv/%, $(TABLES)) ## Process load processed data
.PHONY: views
views: $(patsubst %, db/views/%, $(VIEWS)) ## Create views
.PHONY: help
help: ## Display this help
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z\%\\.\/_-]+:.*?##/ { printf "\033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
##@ Database views
define create_view
(psql -c "\d public.$(subst db/views/,,$@)" > /dev/null 2>&1 && \
echo "view public.$(subst db/views/,,$@) exists") || \
psql -v ON_ERROR_STOP=1 -qX1ef sql/views/$(subst db/views,,$@).sql
endef
.PHONY: db/views/%
db/views/%: sql/views/%.sql load db/schemas/public ## Create view % specified in sql/views/%.sql (will load all data)
$(call create_view)
.PHONY: db/views/Candidate_Elections
db/views/Candidate_Elections: sql/views/Candidate_Elections.sql db/views/Candidates
$(call create_view)
.PHONY: db/views/Committee_Candidate_Links
db/views/Committee_Candidate_Links: db/views/Committees db/views/Candidates
$(call create_view)
.PHONY: db/views/Committee_Officer_Links
db/views/Committee_Officer_Links: db/views/Committees db/views/Officers
$(call create_view)
.PHONY: db/views/Previous_Officers
db/views/Previous_Officers: db/views/Committees
$(call create_view)
.PHONY: db/views/Receipts
db/views/Receipts: db/views/Committees
$(call create_view)
.PHONY: db/views/Expenditures
db/views/Expenditures: db/views/Committees
$(call create_view)
.PHONY: db/views/Condensed_Receipts
db/views/Condensed_Receipts: db/views/Receipts db/views/Most_Recent_Filings
$(call create_view)
.PHONY: db/views/Condensed_Expenditures
db/views/Condensed_Expenditures: db/views/Expenditures db/views/Most_Recent_Filings
$(call create_view)
.PHONY: db/views/Most_Recent_Filings
db/views/Most_Recent_Filings: db/views/Committees db/views/Filed_Docs db/views/D2_Reports
$(call create_view)
##@ Database structure
define create_raw_table
@(psql -c "\d raw.$(subst db/tables/,,$@)" > /dev/null 2>&1 && \
echo "table raw.$(subst db/tables/,,$@) exists") || \
psql -v ON_ERROR_STOP=1 -qX1ef $<
endef
define create_schema
@(psql -c "\dn $(subst db/schemas/,,$@)" | grep $(subst db/schemas/,,$@) > /dev/null 2>&1 && \
echo "schema $(subst db/schemas/,,$@) exists") || \
psql -v ON_ERROR_STOP=1 -qaX1ec "CREATE SCHEMA $(subst db/schemas/,,$@)"
endef
define load_raw_csv
@(psql -Atc "select count(*) from raw.$(subst db/csv/,,$@)" | grep -v -w "0" > /dev/null 2>&1 && \
echo "raw.$(subst db/csv/,,$@) is not empty") || \
psql -v ON_ERROR_STOP=1 -qX1ec "\copy raw.$(subst db/csv/,,$@) from '$(CURDIR)/$<' with delimiter ',' csv header;"
endef
.PHONY: db
db: ## Create database
@(psql -c "SELECT 1" > /dev/null 2>&1 && \
echo "database $(PGDATABASE) exists") || \
createdb -e $(PGDATABASE) -E UTF8 -T template0 --locale=en_US.UTF-8
.PHONY: db/vacuum
db/vacuum: ## Vacuum db
psql -v ON_ERROR_STOP=1 -qec "VACUUM ANALYZE;"
.PHONY: db/schemas
db/schemas: $(patsubst %, db/schemas/%, $(SCHEMAS)) ## Make all schemas
.PHONY: db/schemas/%
db/schemas/%: db # Create schema % (where % is 'raw', etc)
$(call create_schema)
.PHONY: db/tables/%
db/tables/%: sql/tables/%.sql db/schemas/raw # Create table % from sql/tables/%.sql
$(call create_raw_table)
.PHONY: db/csv/%
db/csv/%: data/processed/%.csv db/tables/% ## Load table % from data/processed/%.csv
$(call load_raw_csv)
.PHONY: dropschema/%
dropschema/%: # @TODO wrap in detection
psql -v ON_ERROR_STOP=1 -qX1c "DROP SCHEMA IF EXISTS $* CASCADE;"
.PHONY: dropdb
dropdb: ## Drop database
dropdb --if-exists -e $(PGDATABASE)
##@ Data processing
data/download/%.txt: ## Download %.txt (where % is something like Candidates)
aria2c -x5 -q -d data/download --ftp-user="$(ILCAMPAIGNCASH_FTP_USER)" --ftp-passwd="$(ILCAMPAIGNCASH_FTP_PASSWORD)" ftp://ftp.elections.il.gov/CampDisclDataFiles/$*.txt
data/processed/%.csv: data/download/%.txt ## Convert data/download/%.txt to data/processed/%.csv
$(PIPENV) python processors/clean_isboe_tsv.py $< $* > $@
##@ Maintenance
.PHONY: install
install: ## Install dependencies
pipenv install
.PHONY: dbshell
dbshell: ## Run a database shell
psql
.PHONY: clean
clean: clean/processed clean/download ## Delete downloads and processed data files
.PHONY: clean/%
clean/%: ## Clean data/%
rm -f data/$*/*