-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot-berlininhabitants.R
78 lines (63 loc) · 2.45 KB
/
plot-berlininhabitants.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
library(sqldf)
library(RPostgreSQL)
# data from csv file, read into sqlite by R, fetch data by sql statements on data frame
options(sqldf.driver = "SQLite")
file <- 'berlininhabitants-2011.csv'
csv <- read.csv(file, header = TRUE, sep = ';')
colnames(csv) <- c("official_district", "district", "gender", "nationality", "age", "quantity")
# list female percentages for every district:
by_women <- sqldf("SELECT
district, round( cast(
sum(case when gender = 'f' then quantity end) AS real)/sum(quantity)*100, 2)
AS percent, sum(quantity)
FROM
csv
GROUP BY
district
ORDER BY
percent
DESC LIMIT 10")
par( mar = c(5, 5, 5, 5) )
women_plot <- barplot( c(by_women$percent),
main = "district by women",
horiz = TRUE, names.arg = by_women$district,
col = heat.colors(10),
space = 0.1,
cex.axis = 0.6, cex = 0.6,
las = 1, xlim = c(0, 100)
)
text(5, women_plot,
labels = paste( by_women$percent, '%' ),
adj = c(0, 0.6), cex = 0.7)
# fetch data from postgres database
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "berlindata", user = "postgres", host = "10.13.2.55")
# is indeed Prenzlauer Berg the district with the most children implying AS many parents AS well?
res <- dbSendQuery(con,
"SELECT
district, to_char( cast(
sum(case when age_high = '5' then quantity end) AS decimal)/sum(quantity)*100, 'FM990D99')
AS percent, sum(quantity)
FROM
inhabitants
GROUP BY
district
ORDER BY
percent
DESC LIMIT 10;"
)
children_by_district <- fetch(res, n = -1)
par( mar = c(5, 7, 5, 5) )
children_plot <- barplot( c(children_by_district$percent),
main = "district by children",
horiz = TRUE,
names.arg = children_by_district$district,
col = heat.colors(10),
space = 0.1, cex.axis = 0.6, cex = 0.6,
las = 1, xlim = c(0, 100)
)
text(10, children_plot,
labels = paste( children_by_district$percent, '%' ),
adj = c(0, 0.6), cex = 0.7)
dbDisconnect(con)
dbUnloadDriver(drv)