enable picking up old search

JBGruber · Feb 17, 2024 · 5b442ff · 5b442ff
1 parent b7eafb9
commit 5b442ff
Show file tree

Hide file tree

Showing 7 changed files with 294 additions and 121 deletions.
diff --git a/R/api_research.r b/R/api_research.r
@@ -106,7 +106,9 @@ tt_search_api <- function(query,
     cli::cli_abort("{.code start_date} needs to be a valid date or a string like, e.g., \"20210102\"")
   }
 
-  if (verbose) cli::cli_progress_step("Making initial request")
+  if (verbose) {
+    cli::cli_progress_step("Making initial request")
+  }
 
   res <- tt_query_request(
     endpoint = "query/",
@@ -119,36 +121,48 @@ tt_search_api <- function(query,
     is_random = is_random,
     token = token
   )
-  if (verbose) cli::cli_progress_done()
   videos <- purrr::pluck(res, "data", "videos")
   the$search_id <- spluck(res, "data", "search_id")
+  the$cursor <- spluck(res, "data", "cursor")
   the$videos <- videos
 
-  page <- 1
-  # res <- jsonlite::read_json("tests/testthat/example_resp.json")
-  while (purrr::pluck(res, "data", "has_more", .default = FALSE) && page < max_pages) {
-    page <- page + 1
-    if (verbose) cli::cli_progress_step("Getting page {page}",
-                                        msg_done = "Got page {page}")
+  the$page <- 1
+
+  if (verbose) cli::cli_progress_bar(
+    format = "{cli::pb_spin} Got {page} page{?s} with {length(videos)} video{?s} {cli::col_silver('[', cli::pb_elapsed, ']')}",
+    format_done = "{cli::col_green(cli::symbol$tick)} Got {page} page{?s} with {length(videos)} video{?s}",
+    .envir = the
+  )
+
+  while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) {
+    the$page <- the$page + 1
+    the$cursor <- spluck(res, "data", "cursor")
+    if (verbose) cli::cli_progress_update(force = TRUE, .envir = the)
     res <- tt_query_request(
       endpoint = "query/",
       query = query,
       start_date = start_date,
       end_date = end_date,
       fields = fields,
-      cursor = purrr::pluck(res, "data", "cursor", .default = NULL),
-      search_id = purrr::pluck(res, "data", "search_id", .default = NULL),
+      cursor = the$cursor,
+      search_id = the$search_id,
       is_random = is_random,
       token = token
     )
     videos <- c(videos, purrr::pluck(res, "data", "videos"))
-    if (cache) the$videos <- videos
+    if (cache) {
+      the$videos <- videos
+    }
     if (verbose) cli::cli_progress_done()
   }
 
-  if (verbose) cli::cli_progress_step("Parsing data")
+  if (verbose) {
+    cli::cli_progress_done()
+    cli::cli_progress_step("Parsing data")
+  }
   out <- parse_api_search(videos)
 
+  if (verbose) cli::cli_progress_done()
   return(out)
 }
 
@@ -261,23 +275,27 @@ tt_comments_api <- function(video_id,
     fields <- "id,video_id,text,like_count,reply_count,parent_comment_id,create_time"
 
   if (verbose) cli::cli_progress_step("Making initial request")
+
   res <- tt_query_request(
     endpoint = "comment/list/",
     video_id = video_id,
     fields = fields,
     cursor = start_cursor,
     token = token
   )
-  if (verbose) cli::cli_progress_done()
   comments <- purrr::pluck(res, "data", "comments")
   if (cache) the$comments <- comments
-  page <- 1
+  the$page <- 1
 
-  # res <- jsonlite::read_json("tests/testthat/example_resp_comments.json")
-  while (purrr::pluck(res, "data", "has_more", .default = FALSE) && page < max_pages) {
-    page <- page + 1
-    if (verbose) cli::cli_progress_step("Getting page {page}",
-                                        msg_done = "Got page {page}")
+  if (verbose) cli::cli_progress_bar(
+    format = "{cli::pb_spin} Got {page} page{?s} with {length(the$comments)} comment{?s} {cli::col_silver('[', cli::pb_elapsed, ']')}",
+    format_done = "{cli::col_green(cli::symbol$tick)} Got {page} page{?s} with {length(the$comments)} comment{?s}",
+    .envir = the
+  )
+
+  while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) {
+    the$page <- the$page + 1
+    if (verbose) cli::cli_progress_update(.envir = the)
     res <- tt_query_request(
       endpoint = "comment/list/",
       video_id = video_id,
@@ -290,7 +308,10 @@ tt_comments_api <- function(video_id,
     if (verbose) cli::cli_progress_done()
   }
 
-  if (verbose) cli::cli_progress_step("Parsing data")
+  if (verbose) {
+    cli::cli_progress_done()
+    cli::cli_progress_step("Parsing data")
+  }
   out <- parse_api_comments(comments)
 
   return(out)
@@ -343,7 +364,8 @@ tt_query_request <- function(endpoint,
     httr2::req_retry(
       max_tries = 5L,
       # don't retry when daily quota is reached
-      is_transient = function(resp) httr2::resp_status(resp) != 429,
+      is_transient = function(resp)
+        httr2::resp_status(resp) %in% c(301:399, 401:428, 430:599),
       # increase backoff after each try
       backoff = function(t) t ^ 3
     ) |>

diff --git a/R/last_.r b/R/last_.r
@@ -12,6 +12,7 @@ last_query <- function() {
   out <- try(parse_api_search(q), silent = TRUE)
   if (methods::is(out, "try-error")) {
     attr(q, "search_id") <- the$search_id
+    attr(out, "cursor") <- the$cursor
     return(q)
   }
   return(out)

diff --git a/R/parse_hidden.r b/R/parse_hidden.r
@@ -32,7 +32,9 @@ parse_video <- function(json_string, video_id) {
       html_status           = html_status,
       music                 = list(spluck(video_data, video_id, "music")),
       challenges            = list(spluck(video_data, video_id, "challenges")),
-      is_classified         = isTRUE(spluck(video_data, video_id, "isContentClassified"))
+      is_classified         = isTRUE(spluck(video_data, video_id, "isContentClassified")),
+      video_status          = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"),
+      video_status_code     = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode")
     ))
   }
 
@@ -66,7 +68,9 @@ parse_video <- function(json_string, video_id) {
       challenges            = list(spluck(video_data, "challenges")),
       is_secret             = isTRUE(spluck(video_data, "secret")),
       is_for_friend         = isTRUE(spluck(video_data, "forFriend")),
-      is_slides             = FALSE
+      is_slides             = FALSE,
+      video_status          = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"),
+      video_status_code     = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode")
     )
     if (identical(out$download_url, "")) {
       out$download_url <- purrr::pluck(video_data, "imagePost", "images", "imageURL", "urlList") |>
@@ -76,7 +80,33 @@ parse_video <- function(json_string, video_id) {
     }
     return(out)
   } else {
-    cli::cli_abort("No video data found")
+    out <- tibble::tibble(
+      video_id              = video_id,
+      video_url             = video_url,
+      video_timestamp       = NA,
+      video_length          = NA,
+      video_title           = NA,
+      video_locationcreated = NA,
+      video_diggcount       = NA,
+      video_sharecount      = NA,
+      video_commentcount    = NA,
+      video_playcount       = NA,
+      author_id             = NA,
+      author_secuid         = NA,
+      author_username       = NA,
+      author_nickname       = NA,
+      author_bio            = NA,
+      download_url          = NA,
+      html_status           = html_status,
+      music                 = NA,
+      challenges            = NA,
+      is_secret             = NA,
+      is_for_friend         = NA,
+      is_slides             = NA,
+      video_status          = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"),
+      video_status_code     = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode")
+    )
+    cli::cli_warn("No video data found")
   }
 
 }

diff --git a/R/parse_research.r b/R/parse_research.r
@@ -21,8 +21,10 @@ parse_api_search <- function(x) {
                          vpluck(x, "id", val = "character"),
                          out$video_id)
 
-  attr(out, "search_id") <- attr(x, "search_id")
   class(out) <- c("tt_results", class(out))
+  attr(out, "search_id") <- the$search_id
+  attr(out, "cursor") <- the$cursor
+
   return(out)
 }
 

diff --git a/tests/testthat/test-tt_videos.R b/tests/testthat/test-tt_videos.R
@@ -19,14 +19,14 @@ test_that("get meta and download", {
 
 
 test_that("parse", {
-  expect_error(parse_video('{"test":1}', video_id = 1L),
-               "No video data found")
+  expect_warning(parse_video('{"test":1}', video_id = 1L),
+                 "No video data found")
   expect_equal(
     dim(parse_video('{"ItemModule":{"test":1}}', video_id = 1L)),
-    c(1L, 16L)
+    c(1L, 18L)
   )
   expect_equal(
     dim(parse_video('{"__DEFAULT_SCOPE__":{"webapp.video-detail":{"itemInfo":{"itemStruct":{"test":1}}}}}', video_id = 1L)),
-    c(1L, 20L)
+    c(1L, 22L)
   )
 })