From 6c5cbefbb821ae8249e924c5c8085cb4c098f728 Mon Sep 17 00:00:00 2001 From: Mark Edmondson Date: Thu, 11 Aug 2016 12:58:37 +0200 Subject: [PATCH] CRAN 0.2.0 checks --- .Rbuildignore | 3 +- DESCRIPTION | 8 +- NEWS.md | 8 +- cran-comments.md | 12 ++- revdep/checks.rds | Bin 0 -> 612 bytes vignettes/bigQueryR.html | 196 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 211 insertions(+), 16 deletions(-) create mode 100644 revdep/checks.rds create mode 100644 vignettes/bigQueryR.html diff --git a/.Rbuildignore b/.Rbuildignore index ac1b2b1..c26ed3b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,4 +3,5 @@ ^\.httr-oauth$ ^\.travis\.yml$ ^CONTRIBUTING\.md$ -^cran-comments\.md$ \ No newline at end of file +^cran-comments\.md$ +^revdep \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index dbb278b..d2266da 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,14 @@ Package: bigQueryR Title: Interface with Google BigQuery with Shiny Compatibility -Version: 0.1.0.9000 +Version: 0.2.0 Authors@R: c(person("Mark", "Edmondson",email = "r@sunholo.com", role = c("aut", "cre")), person("Hadley", "Wickham", , "hadley@rstudio.com", role = "ctb") ) -Description: Interface with Google BigQuery, +Description: Interface with 'Google BigQuery', see for more information. This package uses 'googleAuthR' so is compatible with similar packages, - including Google Cloud Storage () for result extracts. + including 'Google Cloud Storage' () for result extracts. URL: http://code.markedmondson.me/bigQueryR/ BugReports: https://github.com/cloudyr/bigQueryR/issues License: MIT + file LICENSE @@ -17,7 +17,7 @@ Depends: R (>= 3.2.0) Imports: googleAuthR (>= 0.3.1), - googleCloudStorageR (>= 0.1.0) + googleCloudStorageR Suggests: shiny (>= 0.12.1), jsonlite (>= 1.0), diff --git a/NEWS.md b/NEWS.md index d4f9866..60637eb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,12 +1,8 @@ -# bigQueryR 0.1.0.9000 +# bigQueryR 0.2.0 (CRAN) * Download asynch queries straight to disk via googleCloudStorageR - -# bigQueryR 0.1.0 (CRAN) +# bigQueryR 0.1.0 * Added a `NEWS.md` file to track changes to the package. * Initial release - - - diff --git a/cran-comments.md b/cran-comments.md index b8f43b1..e56fa8b 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,17 +1,19 @@ ## Test environments * local OS X install, R 3.3.0 -* ubuntu 12.04 (on travis-ci), R 3.3.0 +* ubuntu 12.04 (on travis-ci), R 3.3.1 * win-builder (devel and release) ## R CMD check results -0 errors | 0 warnings | 2 notes +0 errors | 0 warnings | 1 notes -* This is a new release. -* Possibly mis-spelled words in DESCRIPTION: BigQuery - this is spelt correctly. +* Possibly mis-spelled words in DESCRIPTION: + BigQuery (2:30) + +This is spelt correctly ## Reverse dependencies -This is a new release, so there are no reverse dependencies. +googleAnalyticsR is a dependency, that when checked had 0 errors. --- diff --git a/revdep/checks.rds b/revdep/checks.rds new file mode 100644 index 0000000000000000000000000000000000000000..1046961820087af62d0816794f887848a20f28af GIT binary patch literal 612 zcmV-q0-OCGiwFP!000001ASCoZ`v>v4GE0FR;d$voTfZYbQ*R74AE(tT21V&m6i@^ zZ^$GjiAe0oc3|vjzr3A8J^+&~l0WaU@4e?-=Vw>bG+i?c*&ErkC%dL>kD6v=^R{f4 zvNhyxM~-hCbAuTVDKST21T3>ZwCu^i`Z%ygR|@v^ujynl8GkT6#I_-^M_>#r1=DjL zMCygq>|5aa8rVZ?s$SFV&+gT|7nm&O4`BY|+hD4gUd>WQ6AZt~js(cZL;-a3h^GVF zmY3^CmKH%V<^{Z6Y_^Mhi{&%E;{wOUN>9UL_B=wwPo=x^m`kGwiE^on-(@;pwyQXH zBgDB9=S3pVuPBQbAz9HQOK!^;cme8D!l3bcHS7oQe=oXaHTx4Ya zk=>L0a-d#%)+^?6&pc?W}B1S^u^Xef`k;QW&Lo-1lP-3`)b1v0{vCI*Acmth7X+VZI9!{VeV0X>W zk_OJ+04-}jf{6NxoYh0v0~QSPG!i@)ueRd4F$#&04YTq;u}pn=FriHF`m$82v0Zto y4W*aov^b8aCZPXGsA8K3&)L + + + + + + + + + + + + + + + +bigQueryR + + + + + + + + + + + + + + + + + +

bigQueryR

+

Mark Edmondson

+

2016-08-11

+ + + +
+

Introduction

+

This is a package for interating with BigQuery from within R.

+

You may want instead to use bigrquery which is more developed with integration with dplyr etc. Some functions from bigrquery are used in this package.

+
+

Why this package then?

+

This package is here as it uses googleAuthR as backend, so has Shiny support, and compatibility with other googleAuthR dependent packages.

+

It also has support for data extracts to Google Cloud Storage, meaning you can download data and make the download URL available to a user via their Google email. If you do a query normally with over 100000 results it hangs and errors.

+

An example of a BigQuery Shiny app running OAuth2 is here, the BigQuery Visualiser

+
+
+
+

Authentication

+

Authentication is as used in other googleAuthR libraries:

+
library(bigQueryR)
+
+## this will open your browser
+## Authenticate with an email that has access to the BigQuery project you need
+bqr_auth()
+
+## verify under a new user
+bqr_auth(new_user=TRUE)
+

If you are authenticating under several APIs via googleAuthRthen use gar_auth() instead with the appropriate scopes set.

+

You can also use service-to-service JSON files and multi-user authentication under Shiny, see the googleAuthR readme for details.

+
+
+

Listing BigQuery meta data

+

Various functions for listing what is in your BigQuery account.

+
library(bigQueryR)
+  
+## this will open your browser
+## Authenticate with an email that has access to the BigQuery project you need
+bqr_auth()
+  
+## verify under a new user
+bqr_auth(new_user=TRUE)
+  
+## get projects
+projects <- bqr_list_projects()
+  
+my_project <- projects[1]
+  
+## for first project, get datasets
+datasets <- bqr_list_datasets[my_project]
+
+my_dataset <- datasets[1]
+## list tables
+my_table <- bqr_list_tables(my_project, my_dataset)
+
+## get metadata for table
+meta_table <- bqr_table_meta(my_project, my_dataset, my_table)
+
+
+

Simple Queries

+

You can pass in queries that have results under ~ 100000 rows using this command:

+
bqr_query("big-query-r","samples",
+          "SELECT COUNT(repository.url) FROM [publicdata:samples.github_nested]")
+

More than that, and the API starts to hang and you are limited by your download bandwidth.

+
+
+

Asynchronous Queries

+

For bigger queries, asynchronous queries save the results to another BigQuery table. You can check the progress of the job via bqr_get_job

+
library(bigQueryR)
+
+## Auth with a project that has at least BigQuery and Google Cloud Storage scope
+bqr_auth()
+
+## make a big query
+job <- bqr_query_asynch("your_project", 
+                        "your_dataset",
+                        "SELECT * FROM blah LIMIT 9999999", 
+                        destinationTableId = "bigResultTable")
+                        
+## poll the job to check its status
+## its done when job$status$state == "DONE"
+bqr_get_job("your_project", job$jobReference$jobId)
+
+##once done, the query results are in "bigResultTable"
+

You may now want to download this data. For large datasets, this is best done via extracting the BigQuery result to Google Cloud Storage, then downloading the data from there.

+

You can create a bucket at Google Cloud Storage see https://cloud.google.com/storage/docs/cloud-console, or you can use library(googleCloudStorageR)

+

Once created, you can extract your data via the below:

+
## Create the data extract from BigQuery to Cloud Storage
+job_extract <- bqr_extract_data("your_project",
+                                "your_dataset",
+                                "bigResultTable",
+                                "your_cloud_storage_bucket_name")
+                                
+## poll the extract job to check its status
+## its done when job$status$state == "DONE"
+bqr_get_job("your_project", job_extract$jobReference$jobId)
+
+## to download via a URL and not logging in via Google Cloud Storage interface:
+## Use an email that is Google account enabled
+## Requires scopes:
+##  https://www.googleapis.com/auth/devstorage.full_control
+##  https://www.googleapis.com/auth/cloud-platform
+## set via options("bigQueryR.scopes") and reauthenticate if needed
+
+download_url <- bqr_grant_extract_access(job_extract, "your@email.com")
+
+## download_url may be multiple if the data is > 1GB
+> [1] "https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000000.csv"
+> [2] "https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000001.csv"
+> [3] "https://storage.cloud.google.com/big-query-r-extracts/extract-20160311112410-000000000002.csv"
+
+ + + + + + + +