Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PWHL Scraper #34

Merged
merged 11 commits into from
Feb 22, 2024
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: fastRhockey
Title: Functions to Access Premier Hockey Federation and National Hockey League Play by Play Data
Version: 0.6.0
Title: Functions to Access Professional Women's Hockey League and National Hockey League Play by Play Data
Version: 0.7.0
Authors@R: c(
person(given = "Ben", family = "Howell", email = "[email protected]", role = c("aut")),
person(given = "Saiem", family = "Gilani", email = "[email protected]", role = c("cre", "aut")),
person(given = "Alyssa", family = "Longmuir", email = "[email protected]", role = c("ctb"))
)
Description: A utility to scrape and load play-by-play data
and statistics from the Premier Hockey Federation (PHF) <https://www.premierhockeyfederation.com/>, formerly
known as the National Women's Hockey League (NWHL). Additionally, allows access to the National Hockey League's
and statistics from the Professional Women's Hockey League <https://www.thepwhl.com/>, formerly
known as the Premier Hockey Federation (PHF) or National Women's Hockey League (NWHL). Additionally, allows access to the National Hockey League's
stats API <https://www.nhl.com/>.
License: MIT + file LICENSE
URL: https://fastRhockey.sportsdataverse.org/,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ export(phf_standings)
export(phf_team_box)
export(phf_team_roster)
export(phf_team_stats)
export(pwhl_schedule)
export(pwhl_team_roster)
export(pwhl_teams)
export(update_nhl_db)
export(update_phf_db)
import(dplyr)
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# **fastRhockey 0.7.0**

### **PWHL functions added**

* ```pwhl_schedule()``` function added.
* ```pwhl_team_roster()``` function added.
* ```pwhl_teams()``` function added.

# **fastRhockey 0.6.0**

* Improved resiliency for several PHF functions, updates under the hood.
Expand Down
450 changes: 450 additions & 0 deletions R/pwhl_pbp.R

Large diffs are not rendered by default.

111 changes: 111 additions & 0 deletions R/pwhl_schedule.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#' @title **PWHL Schedule**
#' @description PWHL Schedule lookup
#'
#' @param season Season (YYYY) to pull the schedule from, the concluding year in XXXX-YY format
#' @return A data frame with schedule data
#' @import jsonlite
#' @import dplyr
#' @import httr
#' @importFrom glue glue
#' @export
#' @examples
#' \donttest{
#' try(pwhl_schedule(season = 2023))
#' }

pwhl_schedule <- function(season) {

base_url = "https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=schedule&team=-1&season=1&month=-1&location=homeaway&key=694cfeed58c932ee&client_code=pwhl&site_id=2&league_id=1&division_id=-1&lang=en&callback=angular.callbacks._1"
full_url = base_url

res <- httr::RETRY(
"GET",
full_url
)

res <- res %>%
httr::content(as = "text", encoding = "utf-8")

res <- gsub("angular.callbacks._1\\(", "", res)
res <- gsub("}}]}]}])", "}}]}]}]", res)

r <- res %>%
jsonlite::parse_json()

gm <- r[[1]]$sections[[1]]$data

schedule_data <- data.frame()

tryCatch(
expr = {
for (i in 1:length(gm)) {

if (is.null(gm[[i]]$prop$venue_name$venueUrl)) {
venue <-'TBD'
} else {
venue <- gm[[i]]$prop$venue_name$venueUrl
}

game_info <- data.frame(
"game_id" = c(gm[[i]]$row$game_id),
"game_date" = c(gm[[i]]$row$date_with_day),
"game_status" = c(gm[[i]]$row$game_status),
"home_team" = c(gm[[i]]$row$home_team_city),
"home_team_id" = c(gm[[i]]$prop$home_team_city$teamLink),
"away_team" = c(gm[[i]]$row$visiting_team_city),
"away_team_id" = c(gm[[i]]$prop$visiting_team_city$teamLink),
"home_score" = c(gm[[i]]$row$home_goal_count),
"away_score" = c(gm[[i]]$row$visiting_goal_count),
"venue" = c(gm[[i]]$row$venue_name),
"venue_url" = c(venue)
)

schedule_data <- rbind(
schedule_data,
game_info
)

}

schedule_data <- schedule_data %>%
dplyr::mutate(
winner = dplyr::case_when(
.data$home_score == '' | .data$away_score == "-" ~ '-',
.data$home_score > .data$away_score ~ .data$home_team,
.data$away_score > .data$home_score ~ .data$away_team,
.data$home_score == .data$away_score & .data$home_score != "-" ~ "Tie",
TRUE ~ NA_character_
),
season = season
) %>%
dplyr::select(
c(
"game_id",
"game_date",
"game_status",
"home_team",
"home_team_id",
"away_team",
"away_team_id",
"home_score",
"away_score",
"winner",
"venue",
"venue_url"
)
)
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid season or no schedule data available! Try a season from 2023 onwards!"))

},
warning = function(w) {
},
finally = {
}
)

return(schedule_data)

}

120 changes: 120 additions & 0 deletions R/pwhl_standings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#' @title **PWHL Standings**
#' @description PWHL Standings lookup
#'
#' @param season Season (YYYY) to pull the roster from, the concluding year in XXXX-YY format
#' @param regular Bool for whether to pull regular or pre-season rosters
#' @return A data frame with standings data
#' @import jsonlite
#' @import dplyr
#' @import httr
#' @importFrom glue glue

pwhl_standings <- function(season = 2023, regular = TRUE) {
benhowell71 marked this conversation as resolved.
Show resolved Hide resolved
if (regular) {
season_id <- 1
} else if (! regular) {
season_id <- 2
}

REG_URL = paste0("https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=teams&groupTeamsBy=league&context=overall&site_id=2&season=", season_id, "&special=false&key=694cfeed58c932ee&client_code=pwhl&league_id=1&division=undefined&sort=points&lang=en&callback=angular.callbacks._b")
URL = paste0("https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=teams&groupTeamsBy=division&context=overall&site_id=2&season=", season_id, "&special=true&key=694cfeed58c932ee&client_code=pwhl&league_id=1&division=-1&sort=points&lang=en&callback=angular.callbacks._4")
benhowell71 marked this conversation as resolved.
Show resolved Hide resolved
reg_res <- httr::RETRY(
"GET",
REG_URL
benhowell71 marked this conversation as resolved.
Show resolved Hide resolved
)

reg_res <- reg_res %>%
httr::content(as = "text", encoding = "utf-8")

reg_res <- gsub("angular.callbacks._b\\(", "", reg_res)
reg_res <- gsub("}}]}]}])", "}}]}]}]", reg_res)
benhowell71 marked this conversation as resolved.
Show resolved Hide resolved
r_reg <- reg_res %>%
jsonlite::parse_json()

res <- httr::RETRY(
"GET",
URL
)

res <- res %>%
httr::content(as = "text", encoding = "utf-8")

res <- gsub("angular.callbacks._4\\(", "", res)
res <- gsub("}}]}]}])", "}}]}]}]", res)
r <- res %>%
jsonlite::parse_json()

reg_data <- r_reg[[1]]$sections[[1]]$data
data <- r[[1]]$sections[[1]]$data

reg_standings <- data.frame()
standings <- data.frame()
# data[[1]]

# jsonlite::fromJSON(r[[1]]$sections[[1]]$data)
# jsonlite::flatten(data)

tryCatch(
expr = {
for (y in 1:length(reg_data)) {

reg_team_stand <- data.frame(
team_rank = c(reg_data[[y]]$row$rank),
team = c(reg_data[[y]]$row$name),
team_code = c(reg_data[[y]]$row$team_code),
games_played = c(reg_data[[y]]$row$games_played),
points = c(reg_data[[y]]$row$points),
wins = c(reg_data[[y]]$row$regulation_wins),
non_reg_wins = c(reg_data[[y]]$row$non_reg_wins),
losses = c(reg_data[[y]]$row$losses),
non_reg_losses = c(reg_data[[y]]$row$non_reg_losses),
goals_for = c(reg_data[[y]]$row$goals_for),
goals_against = c(reg_data[[y]]$row$goals_against),
games_remaining = c(reg_data[[y]]$row$games_remaining)
)

reg_standings <- dplyr::bind_rows(reg_standings, reg_team_stand)

}


for (y in 1:length(data)) {

team_stand <- data.frame(
# team_rank = c(data[[y]]$row$rank),
team = c(data[[y]]$row$name),
team_code = c(data[[y]]$row$team_code),
# games = c(data[[y]]$row$games_played),
ot_wins = c(data[[y]]$row$ot_wins),
ot_losses = c(data[[y]]$row$ot_losses),
so_wins = c(data[[y]]$row$shootout_wins),
so_losses = c(data[[y]]$row$shootout_losses),
power_play_goals = c(data[[y]]$row$power_play_goals),
power_play_goals_against = c(data[[y]]$row$power_play_goals_against),
power_plays = c(data[[y]]$row$power_plays),
power_play_pct = c(data[[y]]$row$power_play_pct),
short_handed_goals = c(data[[y]]$row$short_handed_goals_for),
short_handed_goals_against = c(data[[y]]$row$short_handed_goals_against),
times_short_handed = c(data[[y]]$row$times_short_handed),
penalty_kill_pct = c(data[[y]]$row$penalty_kill_pct)
)

standings <- dplyr::bind_rows(standings, team_stand)

}

lg_standings <- reg_standings %>%
dplyr::left_join(standings, by = c("team", "team_code"))
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid season or no roster data available! Try a season from 2023 onwards!"))

},
warning = function(w) {
},
finally = {
}
benhowell71 marked this conversation as resolved.
Show resolved Hide resolved
)
return(lg_standings)
benhowell71 marked this conversation as resolved.
Show resolved Hide resolved

}
Loading