-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path04-model-tourney-sums.R
48 lines (41 loc) · 1.28 KB
/
04-model-tourney-sums.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
tourney_score_diff_sums <-
tourney_data %>%
group_by(year, winner_team, winner_seed) %>%
summarize(
sum_score_diff = sum(winner_score - loser_score),
ngames = n()
) %>%
rename(school = winner_team) %>%
ungroup()
stats_df2 <-
stats_df %>%
mutate(year = as.numeric(year)) %>%
dplyr::select(-starts_with("_NA..")) %>%
filter(!is.na(Overall_SRS)) %>%
dplyr::select_if(funs(!any(is.na(.))))
names(stats_df2) <- str_replace_all(names(stats_df2), "\\%", "pct")
names(stats_df2) <- str_replace_all(names(stats_df2), "\\s+","")
names(stats_df2) <- str_replace_all(names(stats_df2), "\\.", "")
names(stats_df2) <- str_replace_all(names(stats_df2), "^\\_","")
names(stats_df2) <- str_replace_all(names(stats_df2), "\\-","")
moddf <-
stats_df2 %>%
left_join(tourney_score_diff_sums)
moddf <-
moddf %>%
dplyr::select(
year, school, ngames, made_it,
Overall_SRS, Overall_SOS,
sum_score_diff,
contains("pct"))
moddf_train <-
moddf %>%
filter(
year > 2010, year <= 2019,
made_it == 1, !is.na(ngames)
) %>%
replace_na(list(sum_score_diff = 0))
moddf_test <-
moddf %>%
filter(year == 2021, made_it == 1)
rfmod <- ranger::ranger(sum_score_diff ~ . -ngames -year, data = moddf_train, num.trees = 500, importance = "impurity")