-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape_nfl.R
39 lines (37 loc) · 1.17 KB
/
scrape_nfl.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
library(tidyverse)
library(rvest)
library(lubridate)
scrape_nfl_year <- function(year) {
str_glue("https://www.pro-football-reference.com/years/",
year, "/games.htm") %>%
read_html %>%
html_nodes("tbody tr:not(.thead)") %>%
map_dfr(function(row) {
raw_date <- row %>%
html_node("[data-stat=\"game_date\"]") %>%
html_text
if (raw_date == "Playoffs") return(list())
date <- raw_date %>%
str_glue(", ", year) %>%
mdy
if (month(date, label = FALSE) < 9) {
year(date) <- year + 1
}
list(week = row %>% html_node("th") %>% html_text,
date = date,
kickoff = row %>%
html_node("[data-stat=\"gametime\"]") %>%
html_text,
winner = row %>%
html_node("[data-stat=\"winner\"]") %>%
html_text,
loser = row %>%
html_node("[data-stat=\"loser\"]") %>%
html_text,
box_url = row %>%
html_node("[data-stat=\"boxscore_word\"] a") %>%
html_attr("href"))
}) %>% write_rds(str_glue("data/nfl_", year, ".rds"))
}
scrape_nfl_year(2016)
# scrape_nfl_year(2017)