-
Notifications
You must be signed in to change notification settings - Fork 1
/
05-combine-rankings.R
56 lines (51 loc) · 1.23 KB
/
05-combine-rankings.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
rf_preds <-
moddf_test %>%
mutate(
pred = predict(rfmod, data = moddf_test %>% ungroup())$predictions
)
final_df <-
kp %>%
full_join(
rf_preds,
by = c("Team" = "school")
) %>%
mutate(
my_rank = rank(1 / pred)
) %>%
mutate(
combo_rank = (Rk + my_rank) / 2
)
final_df %>%
dplyr::select(
Team, Conf, Rk, my_rank, combo_rank
) %>%
arrange(combo_rank) %>%
head()
data.frame(importance(rfmod)) %>%
rownames_to_column(var = "variable") %>%
ggplot(
aes(
x = reorder(variable, importance.rfmod.),
y = importance.rfmod.
)
) +
geom_col() +
coord_flip() +
ggtitle("Which Features Are Important?")
data.frame(
moddf_test,
pred = predict(rfmod, data = moddf_test %>% ungroup())$predictions
) %>%
arrange(desc(pred)) %>%
head(64) %>%
ggplot(aes(x = reorder(school, pred), y = pred)) +
geom_col() +
coord_flip() +
ggtitle("Random Forest Predictions",
subtitle = "sum(winning margin) for whole tournament")
final_df %>%
ggplot(aes(x = as.numeric(Rk), y = pred)) +
geom_text(aes(label = Team)) + scale_y_log10() +
scale_x_log10() +
ggtitle("My Predictions vs Ken Pom",
subtitle = "Mostly Correlated, some discrepancies")