Skip to content

Commit

Permalink
enable picking up old search
Browse files Browse the repository at this point in the history
  • Loading branch information
JBGruber committed Feb 17, 2024
1 parent b7eafb9 commit 5b442ff
Show file tree
Hide file tree
Showing 7 changed files with 294 additions and 121 deletions.
64 changes: 43 additions & 21 deletions R/api_research.r
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ tt_search_api <- function(query,
cli::cli_abort("{.code start_date} needs to be a valid date or a string like, e.g., \"20210102\"")
}

if (verbose) cli::cli_progress_step("Making initial request")
if (verbose) {
cli::cli_progress_step("Making initial request")
}

res <- tt_query_request(
endpoint = "query/",
Expand All @@ -119,36 +121,48 @@ tt_search_api <- function(query,
is_random = is_random,
token = token
)
if (verbose) cli::cli_progress_done()
videos <- purrr::pluck(res, "data", "videos")
the$search_id <- spluck(res, "data", "search_id")
the$cursor <- spluck(res, "data", "cursor")
the$videos <- videos

page <- 1
# res <- jsonlite::read_json("tests/testthat/example_resp.json")
while (purrr::pluck(res, "data", "has_more", .default = FALSE) && page < max_pages) {
page <- page + 1
if (verbose) cli::cli_progress_step("Getting page {page}",
msg_done = "Got page {page}")
the$page <- 1

if (verbose) cli::cli_progress_bar(
format = "{cli::pb_spin} Got {page} page{?s} with {length(videos)} video{?s} {cli::col_silver('[', cli::pb_elapsed, ']')}",
format_done = "{cli::col_green(cli::symbol$tick)} Got {page} page{?s} with {length(videos)} video{?s}",
.envir = the
)

while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) {
the$page <- the$page + 1
the$cursor <- spluck(res, "data", "cursor")
if (verbose) cli::cli_progress_update(force = TRUE, .envir = the)
res <- tt_query_request(
endpoint = "query/",
query = query,
start_date = start_date,
end_date = end_date,
fields = fields,
cursor = purrr::pluck(res, "data", "cursor", .default = NULL),
search_id = purrr::pluck(res, "data", "search_id", .default = NULL),
cursor = the$cursor,
search_id = the$search_id,
is_random = is_random,
token = token
)
videos <- c(videos, purrr::pluck(res, "data", "videos"))
if (cache) the$videos <- videos
if (cache) {
the$videos <- videos
}
if (verbose) cli::cli_progress_done()
}

if (verbose) cli::cli_progress_step("Parsing data")
if (verbose) {
cli::cli_progress_done()
cli::cli_progress_step("Parsing data")
}
out <- parse_api_search(videos)

if (verbose) cli::cli_progress_done()
return(out)
}

Expand Down Expand Up @@ -261,23 +275,27 @@ tt_comments_api <- function(video_id,
fields <- "id,video_id,text,like_count,reply_count,parent_comment_id,create_time"

if (verbose) cli::cli_progress_step("Making initial request")

res <- tt_query_request(
endpoint = "comment/list/",
video_id = video_id,
fields = fields,
cursor = start_cursor,
token = token
)
if (verbose) cli::cli_progress_done()
comments <- purrr::pluck(res, "data", "comments")
if (cache) the$comments <- comments
page <- 1
the$page <- 1

# res <- jsonlite::read_json("tests/testthat/example_resp_comments.json")
while (purrr::pluck(res, "data", "has_more", .default = FALSE) && page < max_pages) {
page <- page + 1
if (verbose) cli::cli_progress_step("Getting page {page}",
msg_done = "Got page {page}")
if (verbose) cli::cli_progress_bar(
format = "{cli::pb_spin} Got {page} page{?s} with {length(the$comments)} comment{?s} {cli::col_silver('[', cli::pb_elapsed, ']')}",
format_done = "{cli::col_green(cli::symbol$tick)} Got {page} page{?s} with {length(the$comments)} comment{?s}",
.envir = the
)

while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) {
the$page <- the$page + 1
if (verbose) cli::cli_progress_update(.envir = the)
res <- tt_query_request(
endpoint = "comment/list/",
video_id = video_id,
Expand All @@ -290,7 +308,10 @@ tt_comments_api <- function(video_id,
if (verbose) cli::cli_progress_done()
}

if (verbose) cli::cli_progress_step("Parsing data")
if (verbose) {
cli::cli_progress_done()
cli::cli_progress_step("Parsing data")
}
out <- parse_api_comments(comments)

return(out)
Expand Down Expand Up @@ -343,7 +364,8 @@ tt_query_request <- function(endpoint,
httr2::req_retry(
max_tries = 5L,
# don't retry when daily quota is reached
is_transient = function(resp) httr2::resp_status(resp) != 429,
is_transient = function(resp)
httr2::resp_status(resp) %in% c(301:399, 401:428, 430:599),
# increase backoff after each try
backoff = function(t) t ^ 3
) |>
Expand Down
1 change: 1 addition & 0 deletions R/last_.r
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ last_query <- function() {
out <- try(parse_api_search(q), silent = TRUE)
if (methods::is(out, "try-error")) {
attr(q, "search_id") <- the$search_id
attr(out, "cursor") <- the$cursor
return(q)
}
return(out)
Expand Down
36 changes: 33 additions & 3 deletions R/parse_hidden.r
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ parse_video <- function(json_string, video_id) {
html_status = html_status,
music = list(spluck(video_data, video_id, "music")),
challenges = list(spluck(video_data, video_id, "challenges")),
is_classified = isTRUE(spluck(video_data, video_id, "isContentClassified"))
is_classified = isTRUE(spluck(video_data, video_id, "isContentClassified")),
video_status = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"),
video_status_code = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode")
))
}

Expand Down Expand Up @@ -66,7 +68,9 @@ parse_video <- function(json_string, video_id) {
challenges = list(spluck(video_data, "challenges")),
is_secret = isTRUE(spluck(video_data, "secret")),
is_for_friend = isTRUE(spluck(video_data, "forFriend")),
is_slides = FALSE
is_slides = FALSE,
video_status = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"),
video_status_code = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode")
)
if (identical(out$download_url, "")) {
out$download_url <- purrr::pluck(video_data, "imagePost", "images", "imageURL", "urlList") |>
Expand All @@ -76,7 +80,33 @@ parse_video <- function(json_string, video_id) {
}
return(out)
} else {
cli::cli_abort("No video data found")
out <- tibble::tibble(
video_id = video_id,
video_url = video_url,
video_timestamp = NA,
video_length = NA,
video_title = NA,
video_locationcreated = NA,
video_diggcount = NA,
video_sharecount = NA,
video_commentcount = NA,
video_playcount = NA,
author_id = NA,
author_secuid = NA,
author_username = NA,
author_nickname = NA,
author_bio = NA,
download_url = NA,
html_status = html_status,
music = NA,
challenges = NA,
is_secret = NA,
is_for_friend = NA,
is_slides = NA,
video_status = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"),
video_status_code = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode")
)
cli::cli_warn("No video data found")
}

}
Expand Down
4 changes: 3 additions & 1 deletion R/parse_research.r
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ parse_api_search <- function(x) {
vpluck(x, "id", val = "character"),
out$video_id)

attr(out, "search_id") <- attr(x, "search_id")
class(out) <- c("tt_results", class(out))
attr(out, "search_id") <- the$search_id
attr(out, "cursor") <- the$cursor

return(out)
}

Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-tt_videos.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ test_that("get meta and download", {


test_that("parse", {
expect_error(parse_video('{"test":1}', video_id = 1L),
"No video data found")
expect_warning(parse_video('{"test":1}', video_id = 1L),
"No video data found")
expect_equal(
dim(parse_video('{"ItemModule":{"test":1}}', video_id = 1L)),
c(1L, 16L)
c(1L, 18L)
)
expect_equal(
dim(parse_video('{"__DEFAULT_SCOPE__":{"webapp.video-detail":{"itemInfo":{"itemStruct":{"test":1}}}}}', video_id = 1L)),
c(1L, 20L)
c(1L, 22L)
)
})
Loading

0 comments on commit 5b442ff

Please sign in to comment.