From 697f101d8829d5150d2ef530ba9e55f79b95e46f Mon Sep 17 00:00:00 2001 From: Django Datama Date: Tue, 12 Jan 2021 13:23:20 +0100 Subject: [PATCH 1/2] New : dryRun parameter for a query. This will output a list containing totalBytesProcessed for google's dryRun query. REF : https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery --- R/query.R | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/R/query.R b/R/query.R index e094c27..103795d 100644 --- a/R/query.R +++ b/R/query.R @@ -35,7 +35,8 @@ bqr_query <- function(projectId = bqr_get_global_project(), query, maxResults = 1000, useLegacySql = TRUE, - useQueryCache = TRUE){ + useQueryCache = TRUE, + dryRun = FALSE){ check_bq_auth() if(endsWith(query, ".sql")){ @@ -54,7 +55,8 @@ bqr_query <- function(projectId = bqr_get_global_project(), defaultDataset = list( datasetId = datasetId, projectId = projectId - ) + ), + dryRun = dryRun ) body <- rmNullObs(body) @@ -62,13 +64,24 @@ bqr_query <- function(projectId = bqr_get_global_project(), # solve 404? the_url <- sprintf("https://www.googleapis.com/bigquery/v2/projects/%s/queries", projectId) - q <- googleAuthR::gar_api_generator(the_url, - "POST", - data_parse_function = parse_bqr_query, - checkTrailingSlash = FALSE) - - data <- try(q(the_body = body, - path_arguments = list(projects = projectId))) + if(dryRun){ + q <- googleAuthR::gar_api_generator(the_url, + "POST", + checkTrailingSlash = FALSE) + data <- try(q(the_body = body, + path_arguments = list(projects = projectId))) + if(!is.error(data)){ + data <- data$content + } + + }else{ + q <- googleAuthR::gar_api_generator(the_url, + "POST", + data_parse_function = parse_bqr_query, + checkTrailingSlash = FALSE) + data <- try(q(the_body = body, + path_arguments = list(projects = projectId))) + } if(is.error(data)) { warning(error.message(data)) From bdd954112da40f0349af885014f3f9b36a8577ba Mon Sep 17 00:00:00 2001 From: Django Datama Date: Wed, 27 Jan 2021 11:36:30 +0100 Subject: [PATCH 2/2] New : timeoutMs parameter. Sometimes, a query crashes because it didn't wait enough time for the process to complete. See : https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query See : https://stackoverflow.com/questions/40492570/bigquery-query-response-jobcomplete-false --- R/query.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/query.R b/R/query.R index 103795d..394cbbb 100644 --- a/R/query.R +++ b/R/query.R @@ -36,7 +36,8 @@ bqr_query <- function(projectId = bqr_get_global_project(), maxResults = 1000, useLegacySql = TRUE, useQueryCache = TRUE, - dryRun = FALSE){ + dryRun = FALSE, + timeoutMs = 600*1000){ check_bq_auth() if(endsWith(query, ".sql")){ @@ -56,6 +57,7 @@ bqr_query <- function(projectId = bqr_get_global_project(), datasetId = datasetId, projectId = projectId ), + timeoutMs = timeoutMs, dryRun = dryRun )