diff --git a/.gitignore b/.gitignore
index 21275f5..7b16023 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .RData
 .Rhistory
 *.swp
+inst/doc
diff --git a/R/append_values.r b/R/append_values.r
index aa6ae38..9bfd493 100644
--- a/R/append_values.r
+++ b/R/append_values.r
@@ -1,4 +1,13 @@
-#' Append keys to a new column
+#' Appends all values with a specified type as a new column
+#' 
+#' The append_values_X functions let you take any remaining JSON and add it as
+#' a column X (for X in "string", "number", "logical") insofar as it is of the
+#' JSON type specified.
+#'
+#' Any values that do not conform to the type specified will be NA in the resulting
+#' column. This includes other scalar types (e.g., numbers or logicals if you are
+#' using append_values_string) and *also* any rows where the JSON is still an
+#' object or an array.
 #' 
 #' @name append_values
 #' @param x a tbl_json object
@@ -6,6 +15,11 @@
 #'   under
 #' @param force parameter that determines if the variable type should be computed or not
 #'        if force is FALSE, then the function may take more memory
+#' @examples
+#' library(magrittr)  # for %>%
+#' '{"first": "bob", "last": "jones"}' %>% 
+#'   gather_keys() %>%
+#'   append_values_string()
 NULL
 
 #' Creates the append_values_* functions
diff --git a/R/data-worldbank.r b/R/data-worldbank.r
index 3f57622..73d97f4 100644
--- a/R/data-worldbank.r
+++ b/R/data-worldbank.r
@@ -19,7 +19,7 @@
 #'     name = jstring("project_name"), # Spread name 
 #'     region = jstring("regionname")  # Spread region
 #'   ) %>% 
-#'   enter_object("sector") %>%              # Enter the 'sector' object
+#'   enter_object("majorsector_percent") %>%              # Enter the 'sector' object
 #'   gather_array("sector.index") %>%        # Gather the array
 #'   spread_values(sector = jstring("Name")) # Spread the sector name
 #' 
diff --git a/R/enter_object.r b/R/enter_object.r
index 8afb351..2d2be9e 100644
--- a/R/enter_object.r
+++ b/R/enter_object.r
@@ -1,8 +1,27 @@
-#' Selects an object by key and filters rows to just those with matching keys
+#' Dive into a specific object "key"
+#' 
+#' JSON can contain nested objects, such as {"key1": {"key2": [1, 2, 3]}}. The
+#' function enter_object() can be used to access the array nested under "key1"
+#' and "key2". After using enter_object(), all further tidyjson calls happen 
+#' inside the referenced object (all other JSON data outside the object 
+#' is discarded). If the object doesn't exist for a given row / index, then that 
+#' data.frame row will be discarded.
+#' 
+#' This is useful when you want to limit your data to just information found in
+#' a specific key. Use the ... to specific a sequence of keys that you want to
+#' enter into. Keep in mind that any rows with JSON that do not contain the key
+#' will be discarded by this function.
 #' 
 #' @param x a tbl_json object
 #' @param ... path to filter
 #' @export
+#' @examples
+#' library(magrittr)  # for %>%
+#' c('{"name": "bob", "children": ["sally", "george"]}', '{"name": "anne"}') %>%
+#'   spread_values(parent.name = jstring("name")) %>%
+#'   enter_object("children") %>% 
+#'   gather_array %>% 
+#'   append_values_string("children")
 enter_object <- function(x, ...) {
   
   if (!is.tbl_json(x)) x <- as.tbl_json(x)
diff --git a/R/gather_array.r b/R/gather_array.r
index 28362e9..780fd22 100644
--- a/R/gather_array.r
+++ b/R/gather_array.r
@@ -1,10 +1,31 @@
-#' Expands a tbl_json to span the indices of a JSON array
+#' Stack a JSON array
+#'
+#' Given a JSON array, such as [1, 2, 3], gather_array will "stack" the array in 
+#' the tbl_json data.frame, by replicating each row of the data.frame by the
+#' length of the corresponding JSON array. A new column (by default called 
+#' "array.index") will be added to keep track of the referenced position in the
+#' array for each row of the resuling data.frame.
+#' 
+#' JSON can contain arrays of data, which can be simple vectors (fixed or varying 
+#' length integer, character or logical vectors). But they also often contain 
+#' lists of other objects (like a list of purchases for a user). The function 
+#' gather_array() takes JSON arrays and duplicates the rows in the data.frame to 
+#' correspond to the indices of the array, and puts the elements of 
+#' the array into the JSON attribute. This is equivalent to “stacking” the array 
+#' in the data.frame, and lets you continue to manipulate the remaining JSON 
+#' in the elements of the array. For simple arrays, use append_values_* to 
+#' capture all of the values of the array. For more complex arrays (where the
+#' values are themselves objects or arrays), continue using other tidyjson
+#' functions to structure the data as needed.
 #' 
 #' @param x a tbl_json whose JSON attribute should always be an array
 #' @param column.name the name to give to the array index column created
 #' @return a tbl_json with a new column (column.name) that captures the array
 #'   index and JSON attribute extracted from the array
 #' @export
+#' @examples
+#' library(magrittr)  # for %>%  
+#' '[1, "a", {"k": "v"}]' %>% gather_array %>% json_types
 gather_array <- function(x, column.name = "array.index") {
   
   if (!is.tbl_json(x)) x <- as.tbl_json(x)
diff --git a/R/gather_keys.r b/R/gather_keys.r
index d41b123..c397003 100644
--- a/R/gather_keys.r
+++ b/R/gather_keys.r
@@ -1,10 +1,22 @@
-#' Gathers every key from the top level of the json and stacks them 
-#' 
+#' Stack a JSON {"key": value} object
+#'
+#' Given a JSON key value structure, like {"key1": 1, "key2": 2}, the 
+#' gather_keys() function duplicates the rows of the tbl_json data.frame for
+#' every key, adds a new column (default name "key") to capture the key names,
+#' and then dives into the JSON values to enable further manipulation with
+#' downstream tidyjson functions.
+#'
+#' This allows you to *enter into* the keys of the objects just like `gather_array`
+#' let you enter elements of the array.
+#'
 #' @param x a tbl_json whose JSON attribute should always be an object
 #' @param column.name the name to give to the column of key names created
 #' @return a tbl_json with a new column (column.name) that captures the keys
 #'   and JSON attribute of the associated value data
 #' @export
+#' @examples
+#' library(magrittr)  # for %>% 
+#' '{"name": "bob", "age": 32}' %>% gather_keys %>% json_types
 gather_keys <- function(x, column.name = "key") {
 
   if (!is.tbl_json(x)) x <- as.tbl_json(x)
diff --git a/R/json_lengths.r b/R/json_lengths.r
index f2fe042..5761735 100644
--- a/R/json_lengths.r
+++ b/R/json_lengths.r
@@ -1,9 +1,19 @@
-#' Add a column that tells the 'length' of the data in the root of the JSON
+#' Add a column that contains the length of the JSON data
+#' 
+#' When investigating JSON data it can be helpful to identify the lengths of the
+#' JSON objects or arrays, especialy when they are 'ragged' across documents. The
+#' json_lengths() function adds a column (default name "length") that contains
+#' the 'length' of the JSON associated with each row. For objects, this will
+#' be equal to the number of keys. For arrays, this will be equal to the length
+#' of the array. All scalar values will be of length 1.
 #' 
 #' @param x a tbl_json object
 #' @param column.name the name to specify for the length column
 #' @return a tbl_json object with column.name column that tells the length
 #' @export
+#' @examples 
+#' library(magrittr)  # for %>% 
+#' c('[1, 2, 3]', '{"k1": 1, "k2": 2}', '1', {}) %>% json_lengths
 json_lengths <- function(x, column.name = "length") {
   
   if (!is.tbl_json(x)) x <- as.tbl_json(x)
diff --git a/R/json_types.r b/R/json_types.r
index 9040441..7edd0e7 100644
--- a/R/json_types.r
+++ b/R/json_types.r
@@ -1,9 +1,21 @@
 #' Add a column that tells the 'type' of the data in the root of the JSON
-#' 
+#'
+#' The function json_types() inspects the JSON associated with 
+#' each row of the tbl_json data.frame, and adds a new column ("type" by 
+#' default) that identifies the type according to the 
+#' JSON standard at http://json.org/.
+#'
+#' This is particularly useful for inspecting your JSON data types, and can added
+#' after gather_array() (or gather_keys()) to inspect the types of the elements
+#' (or values) in arrays (or objects).
+#'
 #' @param x a tbl_json object
 #' @param column.name the name to specify for the type column
 #' @return a tbl_json object with column.name column that tells the type
 #' @export
+#' @examples 
+#' library(magrittr)  # for %>%
+#' c('{"a": 1}', '[1, 2]', '"a"', '1', 'true', 'null') %>% json_types
 json_types <- function(x, column.name = "type") {
   
   if (!is.tbl_json(x)) x <- as.tbl_json(x)
diff --git a/R/spread_values.r b/R/spread_values.r
index 22720fd..7bb099b 100644
--- a/R/spread_values.r
+++ b/R/spread_values.r
@@ -1,9 +1,23 @@
-#' Extracts values from JSON refereced by a sequence of keys
+#' Create new columns with JSON values
+#' 
+#' The spread_values() function lets you dive into (potentially nested) JSON 
+#' objects and extract specific values. spread_values() takes jstring(),
+#' jnumber() or jlogical() named function calls as arguments in order to specify
+#' the type of the data that should be captured at each desired key location.
+#' These values can be of varying types at varying depths.
+#' 
 #' @param x tbl_json object
 #' @param ... column=value list where 'column' will be the column name created
 #'   and 'value' must be a call to jstring(), jnumber() or jlogical() specifying
 #'   the path to get the value (and the type implicit in the function name) 
 #' @export
+#' @examples 
+#' library(magrittr)  # for %>%
+#' '{"name": {"first": "bob", "last": "jones"}, "age": 32}' %>%
+#'   spread_values(
+#'     first.name = jstring("name", "first"), 
+#'     age = jnumber("age")
+#'   )
 spread_values <- function(x, ...) {
   
   if (!is.tbl_json(x)) x <- as.tbl_json(x)
diff --git a/data/worldbank.rda b/data/worldbank.rda
index 2610b63..39accc8 100644
Binary files a/data/worldbank.rda and b/data/worldbank.rda differ
diff --git a/man/append_values.Rd b/man/append_values.Rd
index 28230e5..30d556b 100644
--- a/man/append_values.Rd
+++ b/man/append_values.Rd
@@ -5,7 +5,7 @@
 \alias{append_values_logical}
 \alias{append_values_number}
 \alias{append_values_string}
-\title{Append keys to a new column}
+\title{Appends all values with a specified type as a new column}
 \usage{
 append_values_string(x, column.name = type, force = TRUE)
 
@@ -23,6 +23,20 @@ under}
 if force is FALSE, then the function may take more memory}
 }
 \description{
-Append keys to a new column
+The append_values_X functions let you take any remaining JSON and add it as
+a column X (for X in "string", "number", "logical") insofar as it is of the
+JSON type specified.
+}
+\details{
+Any values that do not conform to the type specified will be NA in the resulting
+column. This includes other scalar types (e.g., numbers or logicals if you are
+using append_values_string) and *also* any rows where the JSON is still an
+object or an array.
+}
+\examples{
+library(magrittr)  # for \%>\%
+'{"first": "bob", "last": "jones"}' \%>\%
+  gather_keys() \%>\%
+  append_values_string()
 }
 
diff --git a/man/enter_object.Rd b/man/enter_object.Rd
index 42c44ae..04f3fdc 100644
--- a/man/enter_object.Rd
+++ b/man/enter_object.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/enter_object.r
 \name{enter_object}
 \alias{enter_object}
-\title{Selects an object by key and filters rows to just those with matching keys}
+\title{Dive into a specific object "key"}
 \usage{
 enter_object(x, ...)
 }
@@ -12,6 +12,25 @@ enter_object(x, ...)
 \item{...}{path to filter}
 }
 \description{
-Selects an object by key and filters rows to just those with matching keys
+JSON can contain nested objects, such as {"key1": {"key2": [1, 2, 3]}}. The
+function enter_object() can be used to access the array nested under "key1"
+and "key2". After using enter_object(), all further tidyjson calls happen
+inside the referenced object (all other JSON data outside the object
+is discarded). If the object doesn't exist for a given row / index, then that
+data.frame row will be discarded.
+}
+\details{
+This is useful when you want to limit your data to just information found in
+a specific key. Use the ... to specific a sequence of keys that you want to
+enter into. Keep in mind that any rows with JSON that do not contain the key
+will be discarded by this function.
+}
+\examples{
+library(magrittr)  # for \%>\%
+c('{"name": "bob", "children": ["sally", "george"]}', '{"name": "anne"}') \%>\%
+  spread_values(parent.name = jstring("name")) \%>\%
+  enter_object("children") \%>\%
+  gather_array \%>\%
+  append_values_string("children")
 }
 
diff --git a/man/gather_array.Rd b/man/gather_array.Rd
index 0f0cb3c..09c06ab 100644
--- a/man/gather_array.Rd
+++ b/man/gather_array.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/gather_array.r
 \name{gather_array}
 \alias{gather_array}
-\title{Expands a tbl_json to span the indices of a JSON array}
+\title{Stack a JSON array}
 \usage{
 gather_array(x, column.name = "array.index")
 }
@@ -16,6 +16,27 @@ a tbl_json with a new column (column.name) that captures the array
   index and JSON attribute extracted from the array
 }
 \description{
-Expands a tbl_json to span the indices of a JSON array
+Given a JSON array, such as [1, 2, 3], gather_array will "stack" the array in
+the tbl_json data.frame, by replicating each row of the data.frame by the
+length of the corresponding JSON array. A new column (by default called
+"array.index") will be added to keep track of the referenced position in the
+array for each row of the resuling data.frame.
+}
+\details{
+JSON can contain arrays of data, which can be simple vectors (fixed or varying
+length integer, character or logical vectors). But they also often contain
+lists of other objects (like a list of purchases for a user). The function
+gather_array() takes JSON arrays and duplicates the rows in the data.frame to
+correspond to the indices of the array, and puts the elements of
+the array into the JSON attribute. This is equivalent to “stacking” the array
+in the data.frame, and lets you continue to manipulate the remaining JSON
+in the elements of the array. For simple arrays, use append_values_* to
+capture all of the values of the array. For more complex arrays (where the
+values are themselves objects or arrays), continue using other tidyjson
+functions to structure the data as needed.
+}
+\examples{
+library(magrittr)  # for \%>\%
+'[1, "a", {"k": "v"}]' \%>\% gather_array \%>\% json_types
 }
 
diff --git a/man/gather_keys.Rd b/man/gather_keys.Rd
index a66fcf1..b413f6d 100644
--- a/man/gather_keys.Rd
+++ b/man/gather_keys.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/gather_keys.r
 \name{gather_keys}
 \alias{gather_keys}
-\title{Gathers every key from the top level of the json and stacks them}
+\title{Stack a JSON {"key": value} object}
 \usage{
 gather_keys(x, column.name = "key")
 }
@@ -16,6 +16,18 @@ a tbl_json with a new column (column.name) that captures the keys
   and JSON attribute of the associated value data
 }
 \description{
-Gathers every key from the top level of the json and stacks them
+Given a JSON key value structure, like {"key1": 1, "key2": 2}, the
+gather_keys() function duplicates the rows of the tbl_json data.frame for
+every key, adds a new column (default name "key") to capture the key names,
+and then dives into the JSON values to enable further manipulation with
+downstream tidyjson functions.
+}
+\details{
+This allows you to *enter into* the keys of the objects just like `gather_array`
+let you enter elements of the array.
+}
+\examples{
+library(magrittr)  # for \%>\%
+'{"name": "bob", "age": 32}' \%>\% gather_keys \%>\% json_types
 }
 
diff --git a/man/json_lengths.Rd b/man/json_lengths.Rd
index 31c53bc..018c9a3 100644
--- a/man/json_lengths.Rd
+++ b/man/json_lengths.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/json_lengths.r
 \name{json_lengths}
 \alias{json_lengths}
-\title{Add a column that tells the 'length' of the data in the root of the JSON}
+\title{Add a column that contains the length of the JSON data}
 \usage{
 json_lengths(x, column.name = "length")
 }
@@ -15,6 +15,15 @@ json_lengths(x, column.name = "length")
 a tbl_json object with column.name column that tells the length
 }
 \description{
-Add a column that tells the 'length' of the data in the root of the JSON
+When investigating JSON data it can be helpful to identify the lengths of the
+JSON objects or arrays, especialy when they are 'ragged' across documents. The
+json_lengths() function adds a column (default name "length") that contains
+the 'length' of the JSON associated with each row. For objects, this will
+be equal to the number of keys. For arrays, this will be equal to the length
+of the array. All scalar values will be of length 1.
+}
+\examples{
+library(magrittr)  # for \%>\%
+c('[1, 2, 3]', '{"k1": 1, "k2": 2}', '1', {}) \%>\% json_lengths
 }
 
diff --git a/man/json_types.Rd b/man/json_types.Rd
index 85d93be..d66b89f 100644
--- a/man/json_types.Rd
+++ b/man/json_types.Rd
@@ -15,6 +15,18 @@ json_types(x, column.name = "type")
 a tbl_json object with column.name column that tells the type
 }
 \description{
-Add a column that tells the 'type' of the data in the root of the JSON
+The function json_types() inspects the JSON associated with
+each row of the tbl_json data.frame, and adds a new column ("type" by
+default) that identifies the type according to the
+JSON standard at http://json.org/.
+}
+\details{
+This is particularly useful for inspecting your JSON data types, and can added
+after gather_array() (or gather_keys()) to inspect the types of the elements
+(or values) in arrays (or objects).
+}
+\examples{
+library(magrittr)  # for \%>\%
+c('{"a": 1}', '[1, 2]', '"a"', '1', 'true', 'null') \%>\% json_types
 }
 
diff --git a/man/spread_values.Rd b/man/spread_values.Rd
index 97f0d87..c65383c 100644
--- a/man/spread_values.Rd
+++ b/man/spread_values.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/spread_values.r
 \name{spread_values}
 \alias{spread_values}
-\title{Extracts values from JSON refereced by a sequence of keys}
+\title{Create new columns with JSON values}
 \usage{
 spread_values(x, ...)
 }
@@ -14,6 +14,18 @@ and 'value' must be a call to jstring(), jnumber() or jlogical() specifying
 the path to get the value (and the type implicit in the function name)}
 }
 \description{
-Extracts values from JSON refereced by a sequence of keys
+The spread_values() function lets you dive into (potentially nested) JSON
+objects and extract specific values. spread_values() takes jstring(),
+jnumber() or jlogical() named function calls as arguments in order to specify
+the type of the data that should be captured at each desired key location.
+These values can be of varying types at varying depths.
+}
+\examples{
+library(magrittr)  # for \%>\%
+'{"name": {"first": "bob", "last": "jones"}, "age": 32}' \%>\%
+  spread_values(
+    first.name = jstring("name", "first"),
+    age = jnumber("age")
+  )
 }
 
diff --git a/man/worldbank.Rd b/man/worldbank.Rd
index 9ad5e6d..bbfa4fa 100644
--- a/man/worldbank.Rd
+++ b/man/worldbank.Rd
@@ -23,7 +23,7 @@ wb_sectors <- worldbank \%>\%   # 500 Projects funded by the world bank
     name = jstring("project_name"), # Spread name
     region = jstring("regionname")  # Spread region
   ) \%>\%
-  enter_object("sector") \%>\%              # Enter the 'sector' object
+  enter_object("majorsector_percent") \%>\%              # Enter the 'sector' object
   gather_array("sector.index") \%>\%        # Gather the array
   spread_values(sector = jstring("Name")) # Spread the sector name
 
diff --git a/vignettes/introduction-to-tidyjson.Rmd b/vignettes/introduction-to-tidyjson.Rmd
new file mode 100644
index 0000000..07d8efc
--- /dev/null
+++ b/vignettes/introduction-to-tidyjson.Rmd
@@ -0,0 +1,622 @@
+---
+title: "Introduction to tidyjson"
+author: "Jeremy Stanley"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Vignette Title}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\usepackage[utf8]{inputenc}
+---
+
+```{r, echo = FALSE}
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
+options(dplyr.print_min = 4L, dplyr.print_max = 4L)
+```
+
+[JSON](http://json.org/) (JavaScript Object Notation) is a lightweight and 
+flexible data format that is easy for humans to read and for machines to parse. 
+JSON has become a common format used in:
+
+- Public APIs (e.g., [Twitter](https://dev.twitter.com/rest/public))
+- NoSQL databases as a document format (e.g., [MongoDB](https://www.mongodb.org/))
+- Relational databases as a new column type (e.g., [PostgreSQL](http://www.postgresql.org/docs/9.4/static/datatype-json.html))
+
+The tidyjson package provides a grammar for turning JSON data into tidy
+data.frames that are easy to work with in dplyr, ggplot2 and other modeling and 
+analytics packages.
+
+## Why use tidyjson?
+
+There are already several libraries for working with JSON data in R, such as
+[rjson](http://cran.r-project.org/web/packages/rjson/index.html),
+[rjsonio](http://cran.r-project.org/web/packages/RJSONIO/index.html) and
+[jsonlite](http://cran.r-project.org/web/packages/jsonlite/index.html). Using
+these libraries, you can transform JSON into a nested R list. However, working
+with nested lists using base R functionality is difficult.
+
+The jsonlite package goes farther by automatically creating a nested R data.frame. 
+This is easier to work with than a list, but has two main limitations. First, the 
+resulting data.frame isn't [tidy](http://vita.had.co.nz/papers/tidy-data.pdf), 
+and so it can still be difficult to work with. Second, the structure of the 
+data.frame may vary as the JSON sample changes, which can happen any time you 
+change the database query or API call that generated the data.
+
+The tidyjson package takes an alternate approach to structuring JSON data into tidy 
+data.frames. Similar to [tidyr](http://cran.r-project.org/web/packages/tidyr/index.html), tidyjson builds
+a grammar for manipulating JSON into a tidy table structure. Tidyjson is based
+on the following principles:
+
+- Leverage other libraries for efficiently parsing JSON ([jsonlite](http://cran.r-project.org/web/packages/jsonlite/index.html))
+- Integrate with pipelines built on [dplyr](http://cran.r-project.org/web/packages/dplyr/index.html)
+and the [magrittr](http://cran.r-project.org/web/packages/magrittr/index.html) `%>%` operator
+- Turn arbitrarily complex and nested JSON into tidy data.frames that can be joined later
+- Guarantee a deterministic data.frame column structure
+- Naturally handle 'ragged' arrays and / or objects (varying lengths by document)
+- Allow for extraction of data in values *or* key names
+- Ensure edge cases are handled correctly (especially empty data)
+
+## A simple example
+
+A simple example of how tidyjson works is as follows:
+
+```{r, message = FALSE}
+library(tidyjson)   # this library
+library(dplyr)      # for %>% and other dplyr functions
+
+# Define a simple JSON array of people
+people <- '
+[
+  {
+    "name": "bob",
+    "age": 32
+  }, 
+  {
+    "name": "susan", 
+    "age": 54
+  }
+]'
+
+# Structure the data
+people %>%                  # %>% is the magrittr pipeline operator 
+  gather_array %>%          # gather (stack) the array by index
+  spread_values(            # spread (widen) values to widen the data.frame
+    name = jstring("name"), # value of "name" becomes a character column
+    age = jnumber("age")    # value of "age" becomes a numeric column
+  )
+```
+
+In such a simple example, we can use `fromJSON` in the jsonlite package to do
+this much faster:
+
+```{r, message = FALSE}
+library(jsonlite)
+jsonlite::fromJSON(people, simplifyDataFrame = TRUE)
+```
+
+However, if the structure of the JSON data changed, so would the columns output 
+by `fromJSON`. So even in this simple example there is value in the explicit 
+structure defined in the tidyjson pipeline above.
+
+## A more complex example
+
+The tidyjson package really shines in a more complex example. Consider the 
+following JSON, which describes three purchases of five items made by two 
+individuals:
+
+```{r}
+purch_json <- '
+[
+  {
+    "name": "bob", 
+    "purchases": [
+      {
+        "date": "2014/09/13",
+        "items": [
+          {"name": "shoes", "price": 187},
+          {"name": "belt", "price": 35}
+        ]
+      }
+    ]
+  },
+  {
+    "name": "susan", 
+    "purchases": [
+      {
+        "date": "2014/10/01",
+        "items": [
+          {"name": "dress", "price": 58},
+          {"name": "bag", "price": 118}
+        ]
+      },
+      {
+        "date": "2015/01/03",
+        "items": [
+          {"name": "shoes", "price": 115}
+        ]
+      }
+    ]
+  }
+]'
+```
+
+Suppose we want to find out how much each person has spent. Using jsonlite, we 
+can parse the JSON:
+
+```{r}
+library(jsonlite)
+# Parse the JSON into a data.frame
+purch_df <- jsonlite::fromJSON(purch_json, simplifyDataFrame = TRUE)
+# Examine results
+purch_df
+```
+
+This looks deceptively simple, on inspection with `str()` we see that the
+resulting data structure is actually a complex nested data.frame:
+
+```{r}
+str(purch_df)
+```
+
+This is difficult to work with, and we end up writing code like this:
+
+```{r}
+items <- lapply(purch_df$purchases, `[[`, "items")
+prices <- lapply(items, lapply, `[[`, "price")
+vapply(lapply(prices, unlist), sum, integer(1))
+```
+
+Reasoning about code like this is nearly impossible, and further, the relational
+structure of the data is lost (we no longer have the name of the user).
+
+We can instead try to use dplyr and the `do{}` operator to get at the
+data in the nested data.frames, but this is equally challenging and confusing:
+```{r}
+purch_df %>% group_by(name) %>% do({
+  .$purchases[[1]] %>% rowwise %>% do({
+    .$items[, "price", drop = FALSE]
+    })
+  }) %>% summarize(price = sum(price))
+```
+
+Using tidyjson, we can build a pipeline to turn this JSON into a tidy data.frame
+where each row corresponds to a purchased item:
+
+```{r}
+purch_items <- purch_json %>%
+  gather_array %>%                                     # stack the users 
+  spread_values(person = jstring("name")) %>%          # extract the user name
+  enter_object("purchases") %>% gather_array %>%       # stack the purchases
+  spread_values(purchase.date = jstring("date")) %>%   # extract the purchase date
+  enter_object("items") %>% gather_array %>%           # stack the items
+  spread_values(                                       # extract item name and price
+    item.name = jstring("name"),
+    item.price = jnumber("price")
+  ) %>%
+  select(person, purchase.date, item.name, item.price) # select only what is needed
+```
+
+The resulting data.frame is exactly what we want
+
+```{r}
+purch_items
+```
+
+And we can easily continue the pipeline in dplyr to compute derived data
+
+```{r}
+purch_items %>% group_by(person) %>% summarize(spend = sum(item.price))
+```
+
+## Data
+
+### Creating a `tbl_json` object
+
+The first step in using tidyjson is to convert your JSON into a `tbl_json` object.
+Almost every function in tidyjson accepts either a `tbl_json` object or a character
+vector of JSON data as it's first parameter, and returns a `tbl_json` object for 
+downstream use. To facilitate integration with dplyr, `tbl_json` inherits from 
+`dplyr::tbl`.
+
+The easiest way to construct a `tbl_json` object is directly from a character
+string:
+
+```{r}
+# Using a single character string
+x <- '{"key": "value"}' %>% as.tbl_json
+x
+attr(x, "JSON")
+```
+
+Behind the scenes, `as.tbl_json` is parsing the JSON string and creating a
+data.frame with 1 column, `document.id`, which keeps track of the character 
+vector position (index) where the JSON data came from. In addition, each
+`tbl_json` object has a "JSON" attribute that contains a list of
+JSON data of the same length as the number of rows in the `data.frame`.
+
+Often times you will have many lines of JSON data that you want to work with, 
+in which case you can directly convert a character vector to obtain a `tbl_json`
+object with the same number of rows:
+
+```{r}
+# Using a vector of JSON strings
+y <- c('{"key1": "value1"}', '{"key2": "value2"}') %>% as.tbl_json
+y
+```
+
+This creates a two row `tbl_json` object, where each row corresponds to an index
+of the character vector. We can see the underlying parsed JSON:
+
+```{r}
+attr(y, "JSON")
+```
+
+If your JSON data is already embedded in a data.frame, then you will need
+to call `as.tbl_json` directly in order to specific which column contains
+the JSON data. Note that the JSON in the data.frame should be character data,
+and not a factor. Use `stringsAsFactors = FALSE` in constructing the data.frame
+to avoid turning the JSON into a factor.
+
+```{r}
+df <- data.frame(
+  x = 1:2,
+  JSON = c('{"key1": "value1"}', '{"key2": "value2"}'),
+  stringsAsFactors = FALSE
+) 
+z <- df %>% as.tbl_json(json.column = "JSON")
+z
+attr(z, "JSON")
+```
+
+### JSON included in the package
+
+The tidyjson package comes with several JSON example datasets:
+
+* `commits`: commit data for the dplyr repo from github API
+* `issues`: issue data for the dplyr repo from github API
+* `worldbank`: world bank funded projects from 
+[jsonstudio](http://jsonstudio.com/resources/)
+* `companies`: startup company data from 
+[jsonstudio](http://jsonstudio.com/resources/)
+
+Each dataset has some example tidyjson queries in `help(commits)`, 
+`help(issues)`, `help(worldbank)` and `help(companies)`.
+
+## Verbs
+
+The rest of tidyjson is comprised of various verbs with operate on `tbl_json`
+objects and return `tbl_json` objects. They are meant to be used in a pipeline
+with the `%>%` operator.
+
+Note that these verbs all operate on *both* the underlying data.frame and the
+JSON, iteratively moving data from the JSON into the data.frame. Any
+modifications of the underlying data.frame outside of these operations
+may produce unintended consequences where the data.frame and JSON become out of
+synch.
+
+The following table provides a reference of how each verb is used and what
+(if any) effect it has on the data.frame rows and columns and on the associated
+JSON.
+
+| Verb                | Use                            | Row Effect          | Column Effect    | JSON Effect  |
+|:--------------------|:-------------------------------|:--------------------|:-----------------|:-------------|
+| `json_types()`      | Identify JSON structure        | none                | type column      | none         |
+| `gather_array()`    | Stack JSON array               | Repeats rows        | index column     | enter array  |
+| `gather_keys()`     | Stack a {"key": value} object  | Repeats rows        | key column       | enter object |
+| `spread_values()`   | Create new columns from values | none                | N value columns  | none         |
+| `append_values_X()` | Append all values of a type    | none                | column of type X | none         |
+| `enter_object()`    | Dive into an object "key"      | Keeps rows with key | none             | enter object |
+| `json_lengths()`    | Identify JSON length           | none                | length column    | none         |
+
+### Identify JSON structure with `json_types()`
+
+One of the first steps you will want to take is to investigate the structure of
+your JSON data. The function `json_types()` inspects the JSON associated with 
+each row of the data.frame, and adds a new column (`type` by default) that 
+identifies the type according to the [JSON standard](http://json.org/).
+
+```{r}
+c('{"a": 1}', '[1, 2]', '"a"', '1', 'true', 'null') %>% json_types
+```
+
+This is particularly useful for inspecting your JSON data types, and can added
+after `gather_array()` (or `gather_keys()`) to inspect the types of the elements
+(or values) in arrays (or objects).
+
+### Stack a JSON array with `gather_array()`
+
+Arrays are sometimes vectors (fixed or varying length integer, character or 
+logical vectors). But they also often contain lists of other objects (like
+a list of purchases for a user). The function `gather_array()` takes JSON arrays
+and duplicates the rows in the data.frame to correspond to the indices of the 
+array, and puts the elements of the array into the JSON attribute. 
+This is equivalent to "stacking" the array in the data.frame, and lets you 
+continue to manipulate the remaining JSON in the elements of the array.
+
+```{r}
+'[1, "a", {"k": "v"}]' %>% gather_array %>% json_types
+```
+
+This allows you to *enter into* an array and begin processing it's elements
+with other tidyjson functions. It retains the array.index in case the relative
+position of elements in the array is useful information.
+
+### Stack a {"key": value} object with `gather_keys()`
+
+Similar to `gather_array()`, `gather_keys()` takes JSON objects and duplicates 
+the rows in the data.frame to correspond to the keys of the object, and puts the 
+values of the object into the JSON attribute.
+
+```{r}
+'{"name": "bob", "age": 32}' %>% gather_keys %>% json_types
+```
+
+This allows you to *enter into* the keys of the objects just like `gather_array`
+let you enter elements of the array.
+
+### Create new columns with JSON values with `spread_values()`
+
+Adding new columns to your `data.frame` is accomplished with `spread_values()`, 
+which lets you dive into (potentially nested) JSON objects and extract specific 
+values. `spread_values()` takes `jstring()`, `jnumber()` or `jlogical()` 
+function calls as arguments in order to specify the type of the data that should 
+be captured at each desired key location
+
+These values can be of varying types at varying depths, e.g.,
+
+```{r}
+'{"name": {"first": "bob", "last": "jones"}, "age": 32}' %>%
+  spread_values(
+    first.name = jstring("name", "first"), 
+    age = jnumber("age")
+  )
+```
+
+### Append all values of a specified type with `append_values_X()`
+
+The `append_values_X()` functions let you take the remaining JSON and add it as
+a column X (for X in "string", "number", "logical") insofar as it is of the
+JSON type specified. For example:
+
+```{r}
+'{"first": "bob", "last": "jones"}' %>% 
+  gather_keys() %>%
+  append_values_string()
+```
+
+Any values that do not conform to the type specified will be NA in the resulting
+column. This includes other scalar types (e.g., numbers or logicals if you are
+using `append_values_string()`) and *also* any rows where the JSON is still an
+object or an array.
+
+### Dive into a specific object "key" with `enter_object()`
+
+For complex JSON structures, you will often need to navigate into nested objects
+in order to continue structuring your data. The function `enter_object()` lets 
+you dive into a specific object key in the JSON attribute, so that all further 
+tidyjson calls happen inside that object (all other JSON data outside the object 
+is discarded). If the object doesn't exist for a given row / index, then that 
+data.frame row will be discarded.
+
+```{r}
+c('{"name": "bob", "children": ["sally", "george"]}', '{"name": "anne"}') %>% 
+  spread_values(parent.name = jstring("name")) %>%
+  enter_object("children") %>% 
+  gather_array %>% 
+  append_values_string("children")
+```
+
+This is useful when you want to limit your data to just information found in
+a specific key.
+
+### Identify length of JSON objects with `json_lengths()`
+
+When investigating JSON data it can be helpful to identify the lengths of the
+JSON objects or arrays, especialy when they are 'ragged' across documents:
+
+```{r}
+c('[1, 2, 3]', '{"k1": 1, "k2": 2}', '1', {}) %>% json_lengths
+```
+
+## Strategies
+
+When beginning to work with JSON data, you often don't have easy access to a
+schema describing what is in the JSON. One of the benefits of document oriented
+data structures is that they let developers create data without having to worry
+about defining the schema explicitly.
+
+Thus, the first step is to understand the structure of the JSON. Begin by 
+visually inspecting a single record with `jsonlite::prettify()`.
+
+```{r}
+'{"key": "value", "array": [1, 2, 3]}' %>% prettify
+```
+
+However, for complex data or large JSON structures this can be tedious. Instead,
+use `gather_keys`, `json_types` and `json_lengths` to summarize the data:
+
+```{r}
+'{"key": "value", "array": [1, 2, 3]}' %>% 
+  gather_keys %>% json_types %>% json_lengths
+```
+
+You can repeat this as you move through the JSON data using `enter_object()` to
+summarize nested structures as well.
+
+Once you have an understanding of how you'd like the data to be assembled, begin
+creating your tidyjson pipeline. Use `enter_objects()` and `gather_array()` to
+navigate the JSON and stack any arrays, and use `spread_values()` to get at 
+(potentially nested) key-value pairs along the way.
+
+Before entering any objects, make sure you first use `spread_values()` to 
+capture any top level identifiers you might need for analytics, summarization or
+relational uses downstream. If an identifier doesn't exist, then you can always
+fall back on the `as.tbl_json` generated document.id column.
+
+If you encounter data where information is encoded in both keys and values,
+then consider using `gather_keys()` and `append_values_X()` where `X` is the type
+of JSON scalar data you expect in the values.
+
+Note that there are often situations where there are multiple arrays or objects
+of differing types that exist at the same level of the JSON hierarchy. In this
+case, you need to use `enter_object()` to enter each of them in *separate*
+pipelines to create *separate* `data.frames` that can then be joined 
+relationally.
+
+Finally, don't forget that once you are done with your JSON tidying, you can
+use [dplyr](http://github.com/hadley/dplyr) to continue manipulating the
+resulting data. `dplyr::filter`, `dplyr::arrange` and `dplyr::mutate` can be
+used and will preserve the JSON attribute for further tidyjson manipulation.
+The same is true for the `[` operator. Other `dplyr` functions will destroy
+the JSON attribute, so you will no longer be able to manipulate the JSON data.
+
+### World bank example
+
+Included in the tidyjson package is a `r length(worldbank)` record sample, 
+`worldbank`, which contains a subset of the JSON data describing world bank 
+funded projects from [jsonstudio](http://jsonstudio.com/resources/).
+
+First, let's take a look at a single record. We can use `jsonlite::prettify` to
+make the JSON easy to read:
+
+```{r}
+library(jsonlite)
+worldbank[1] %>% prettify
+```
+
+An interesting objects is "majorsector_percent", which appears to capture the
+distribution of each project by sector. We also have several funding amounts,
+such as "totalamt", which indicate how much money went into each project.
+
+Let's grab the "totalamt", and then gather the array of sectors and their
+percent allocations.
+
+```{r}
+amts <- worldbank %>%
+  spread_values(
+    total = jnumber("totalamt")
+  ) %>% 
+  enter_object("majorsector_percent") %>% gather_array %>%
+  spread_values(
+    sector = jstring("Name"),
+    pct = jnumber("Percent")
+  ) %>%
+  mutate(total.m = total / 10^6) %>%
+  select(document.id, sector, total.m, pct) %>%
+  tbl_df 
+amts
+```
+
+Let's check that the "pct" column really adds up to 100 by project:
+
+```{r}
+amts %>% 
+  group_by(document.id) %>%
+  summarize(pct.total = sum(pct)) %>%
+  group_by(pct.total) %>%
+  tally
+```
+
+It appears to always add up to 100. Let's also check the distribution of
+the total amounts.
+
+```{r}
+summary(amts$total.m)
+```
+
+Many are 0, the mean is $80m and the max is over $1bn.
+
+Let's now aggregate by the sector and compute, on a dollar weighted basis,
+where the money is going by sector:
+
+```{r}
+amts %>%
+  group_by(sector) %>%
+  summarize(
+    spend.portion = sum(total.m * pct / 100)
+  ) %>%
+  ungroup %>%
+  mutate(spend.dist = spend.portion / sum(spend.portion)) %>%
+  arrange(desc(spend.dist))
+```
+
+### Companies example
+
+Also included in the tidyjson package is a `r length(companies)` record sample, 
+`companies`, which contains a subset of the JSON data describing startups from 
+[jsonstudio](http://jsonstudio.com/resources/).
+
+Instead of using `jsonlite::prettify`, let's quickly summarize the keys using 
+tidyjson and visualize the results:
+
+```{r, fig.width = 7, fig.height = 6}
+library(ggplot2)
+key_stats <- companies %>% 
+  gather_keys %>% json_types %>% group_by(key, type) %>% tally
+key_stats
+ggplot(key_stats, aes(key, n, fill = type)) +
+  geom_bar(stat = "identity", position = "stack") +
+  coord_flip()
+```
+
+Suppose we are interested in exploring the funding round data. Let's examine
+it's structure:
+
+```{r, fig.width = 7, fig.height = 2}
+companies %>%
+  enter_object("funding_rounds") %>%
+  gather_array %>% 
+  gather_keys %>% json_types %>% group_by(key, type) %>% tally %>%
+  ggplot(aes(key, n, fill = type)) +
+    geom_bar(stat = "identity", position = "stack") +
+    coord_flip()
+```
+
+Now, referencing the above visualizations, we can structure some of the data for 
+analysis:
+
+```{r}
+rounds <- companies %>%
+  spread_values(
+    id = jstring("_id", "$oid"),
+    name = jstring("name"),
+    category = jstring("category_code")
+  ) %>%
+  enter_object("funding_rounds") %>%
+  gather_array %>%
+  spread_values(
+    round = jstring("round_code"),
+    raised = jnumber("raised_amount")
+  )
+rounds %>% glimpse
+```
+
+Now we can summarize by category and round how much is raised on average by
+round:
+
+```{r, fig.width = 7, fig.height = 2}
+rounds %>%
+  filter(
+    !is.na(raised),
+    round %in% c('a', 'b', 'c'),
+    category %in% c('enterprise', 'software', 'web')
+  ) %>%
+  group_by(category, round) %>%
+  summarize(raised = mean(raised)) %>%
+  ggplot(aes(round, raised / 10^6, fill = round)) +
+    geom_bar(stat = "identity") +
+    coord_flip() +
+    labs(y = "Raised (m)") +
+    facet_grid(. ~ category)
+```
+
+## Future work
+
+This package is still a work in progress. Significant additional features we
+are contemplating include:
+
+- Summarizing JSON structures and visualizing them to make working with new JSON
+easier
+- Keeping the JSON in a parsed C++ data structure, and using rcpp to speed up
+the manipulation of JSON
+- Push computations to document oriented databases like MongoDB