diff --git a/R/append_values.r b/R/append_values.r index a974113..9bfd493 100644 --- a/R/append_values.r +++ b/R/append_values.r @@ -16,7 +16,7 @@ #' @param force parameter that determines if the variable type should be computed or not #' if force is FALSE, then the function may take more memory #' @examples -#' library(dplyr) +#' library(magrittr) # for %>% #' '{"first": "bob", "last": "jones"}' %>% #' gather_keys() %>% #' append_values_string() diff --git a/R/enter_object.r b/R/enter_object.r index 08033a5..2d2be9e 100644 --- a/R/enter_object.r +++ b/R/enter_object.r @@ -16,7 +16,7 @@ #' @param ... path to filter #' @export #' @examples -#' library(dplyr) +#' library(magrittr) # for %>% #' c('{"name": "bob", "children": ["sally", "george"]}', '{"name": "anne"}') %>% #' spread_values(parent.name = jstring("name")) %>% #' enter_object("children") %>% diff --git a/R/gather_array.r b/R/gather_array.r index 835e090..780fd22 100644 --- a/R/gather_array.r +++ b/R/gather_array.r @@ -23,7 +23,8 @@ #' @return a tbl_json with a new column (column.name) that captures the array #' index and JSON attribute extracted from the array #' @export -#' @examples +#' @examples +#' library(magrittr) # for %>% #' '[1, "a", {"k": "v"}]' %>% gather_array %>% json_types gather_array <- function(x, column.name = "array.index") { diff --git a/R/gather_keys.r b/R/gather_keys.r index d41b123..c397003 100644 --- a/R/gather_keys.r +++ b/R/gather_keys.r @@ -1,10 +1,22 @@ -#' Gathers every key from the top level of the json and stacks them -#' +#' Stack a JSON {"key": value} object +#' +#' Given a JSON key value structure, like {"key1": 1, "key2": 2}, the +#' gather_keys() function duplicates the rows of the tbl_json data.frame for +#' every key, adds a new column (default name "key") to capture the key names, +#' and then dives into the JSON values to enable further manipulation with +#' downstream tidyjson functions. +#' +#' This allows you to *enter into* the keys of the objects just like `gather_array` +#' let you enter elements of the array. +#' #' @param x a tbl_json whose JSON attribute should always be an object #' @param column.name the name to give to the column of key names created #' @return a tbl_json with a new column (column.name) that captures the keys #' and JSON attribute of the associated value data #' @export +#' @examples +#' library(magrittr) # for %>% +#' '{"name": "bob", "age": 32}' %>% gather_keys %>% json_types gather_keys <- function(x, column.name = "key") { if (!is.tbl_json(x)) x <- as.tbl_json(x) diff --git a/R/json_lengths.r b/R/json_lengths.r index f2fe042..5761735 100644 --- a/R/json_lengths.r +++ b/R/json_lengths.r @@ -1,9 +1,19 @@ -#' Add a column that tells the 'length' of the data in the root of the JSON +#' Add a column that contains the length of the JSON data +#' +#' When investigating JSON data it can be helpful to identify the lengths of the +#' JSON objects or arrays, especialy when they are 'ragged' across documents. The +#' json_lengths() function adds a column (default name "length") that contains +#' the 'length' of the JSON associated with each row. For objects, this will +#' be equal to the number of keys. For arrays, this will be equal to the length +#' of the array. All scalar values will be of length 1. #' #' @param x a tbl_json object #' @param column.name the name to specify for the length column #' @return a tbl_json object with column.name column that tells the length #' @export +#' @examples +#' library(magrittr) # for %>% +#' c('[1, 2, 3]', '{"k1": 1, "k2": 2}', '1', {}) %>% json_lengths json_lengths <- function(x, column.name = "length") { if (!is.tbl_json(x)) x <- as.tbl_json(x) diff --git a/R/json_types.r b/R/json_types.r index 9040441..7edd0e7 100644 --- a/R/json_types.r +++ b/R/json_types.r @@ -1,9 +1,21 @@ #' Add a column that tells the 'type' of the data in the root of the JSON -#' +#' +#' The function json_types() inspects the JSON associated with +#' each row of the tbl_json data.frame, and adds a new column ("type" by +#' default) that identifies the type according to the +#' JSON standard at http://json.org/. +#' +#' This is particularly useful for inspecting your JSON data types, and can added +#' after gather_array() (or gather_keys()) to inspect the types of the elements +#' (or values) in arrays (or objects). +#' #' @param x a tbl_json object #' @param column.name the name to specify for the type column #' @return a tbl_json object with column.name column that tells the type #' @export +#' @examples +#' library(magrittr) # for %>% +#' c('{"a": 1}', '[1, 2]', '"a"', '1', 'true', 'null') %>% json_types json_types <- function(x, column.name = "type") { if (!is.tbl_json(x)) x <- as.tbl_json(x) diff --git a/R/spread_values.r b/R/spread_values.r index 22720fd..7bb099b 100644 --- a/R/spread_values.r +++ b/R/spread_values.r @@ -1,9 +1,23 @@ -#' Extracts values from JSON refereced by a sequence of keys +#' Create new columns with JSON values +#' +#' The spread_values() function lets you dive into (potentially nested) JSON +#' objects and extract specific values. spread_values() takes jstring(), +#' jnumber() or jlogical() named function calls as arguments in order to specify +#' the type of the data that should be captured at each desired key location. +#' These values can be of varying types at varying depths. +#' #' @param x tbl_json object #' @param ... column=value list where 'column' will be the column name created #' and 'value' must be a call to jstring(), jnumber() or jlogical() specifying #' the path to get the value (and the type implicit in the function name) #' @export +#' @examples +#' library(magrittr) # for %>% +#' '{"name": {"first": "bob", "last": "jones"}, "age": 32}' %>% +#' spread_values( +#' first.name = jstring("name", "first"), +#' age = jnumber("age") +#' ) spread_values <- function(x, ...) { if (!is.tbl_json(x)) x <- as.tbl_json(x) diff --git a/man/append_values.Rd b/man/append_values.Rd index 6f8e8ab..30d556b 100644 --- a/man/append_values.Rd +++ b/man/append_values.Rd @@ -34,7 +34,7 @@ using append_values_string) and *also* any rows where the JSON is still an object or an array. } \examples{ -library(dplyr) +library(magrittr) # for \%>\% '{"first": "bob", "last": "jones"}' \%>\% gather_keys() \%>\% append_values_string() diff --git a/man/enter_object.Rd b/man/enter_object.Rd index 6947125..04f3fdc 100644 --- a/man/enter_object.Rd +++ b/man/enter_object.Rd @@ -26,7 +26,7 @@ enter into. Keep in mind that any rows with JSON that do not contain the key will be discarded by this function. } \examples{ -library(dplyr) +library(magrittr) # for \%>\% c('{"name": "bob", "children": ["sally", "george"]}', '{"name": "anne"}') \%>\% spread_values(parent.name = jstring("name")) \%>\% enter_object("children") \%>\% diff --git a/man/gather_array.Rd b/man/gather_array.Rd index 0be8e46..09c06ab 100644 --- a/man/gather_array.Rd +++ b/man/gather_array.Rd @@ -36,6 +36,7 @@ values are themselves objects or arrays), continue using other tidyjson functions to structure the data as needed. } \examples{ +library(magrittr) # for \%>\% '[1, "a", {"k": "v"}]' \%>\% gather_array \%>\% json_types } diff --git a/man/gather_keys.Rd b/man/gather_keys.Rd index a66fcf1..b413f6d 100644 --- a/man/gather_keys.Rd +++ b/man/gather_keys.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/gather_keys.r \name{gather_keys} \alias{gather_keys} -\title{Gathers every key from the top level of the json and stacks them} +\title{Stack a JSON {"key": value} object} \usage{ gather_keys(x, column.name = "key") } @@ -16,6 +16,18 @@ a tbl_json with a new column (column.name) that captures the keys and JSON attribute of the associated value data } \description{ -Gathers every key from the top level of the json and stacks them +Given a JSON key value structure, like {"key1": 1, "key2": 2}, the +gather_keys() function duplicates the rows of the tbl_json data.frame for +every key, adds a new column (default name "key") to capture the key names, +and then dives into the JSON values to enable further manipulation with +downstream tidyjson functions. +} +\details{ +This allows you to *enter into* the keys of the objects just like `gather_array` +let you enter elements of the array. +} +\examples{ +library(magrittr) # for \%>\% +'{"name": "bob", "age": 32}' \%>\% gather_keys \%>\% json_types } diff --git a/man/json_lengths.Rd b/man/json_lengths.Rd index 31c53bc..018c9a3 100644 --- a/man/json_lengths.Rd +++ b/man/json_lengths.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/json_lengths.r \name{json_lengths} \alias{json_lengths} -\title{Add a column that tells the 'length' of the data in the root of the JSON} +\title{Add a column that contains the length of the JSON data} \usage{ json_lengths(x, column.name = "length") } @@ -15,6 +15,15 @@ json_lengths(x, column.name = "length") a tbl_json object with column.name column that tells the length } \description{ -Add a column that tells the 'length' of the data in the root of the JSON +When investigating JSON data it can be helpful to identify the lengths of the +JSON objects or arrays, especialy when they are 'ragged' across documents. The +json_lengths() function adds a column (default name "length") that contains +the 'length' of the JSON associated with each row. For objects, this will +be equal to the number of keys. For arrays, this will be equal to the length +of the array. All scalar values will be of length 1. +} +\examples{ +library(magrittr) # for \%>\% +c('[1, 2, 3]', '{"k1": 1, "k2": 2}', '1', {}) \%>\% json_lengths } diff --git a/man/json_types.Rd b/man/json_types.Rd index 85d93be..d66b89f 100644 --- a/man/json_types.Rd +++ b/man/json_types.Rd @@ -15,6 +15,18 @@ json_types(x, column.name = "type") a tbl_json object with column.name column that tells the type } \description{ -Add a column that tells the 'type' of the data in the root of the JSON +The function json_types() inspects the JSON associated with +each row of the tbl_json data.frame, and adds a new column ("type" by +default) that identifies the type according to the +JSON standard at http://json.org/. +} +\details{ +This is particularly useful for inspecting your JSON data types, and can added +after gather_array() (or gather_keys()) to inspect the types of the elements +(or values) in arrays (or objects). +} +\examples{ +library(magrittr) # for \%>\% +c('{"a": 1}', '[1, 2]', '"a"', '1', 'true', 'null') \%>\% json_types } diff --git a/man/spread_values.Rd b/man/spread_values.Rd index 97f0d87..c65383c 100644 --- a/man/spread_values.Rd +++ b/man/spread_values.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/spread_values.r \name{spread_values} \alias{spread_values} -\title{Extracts values from JSON refereced by a sequence of keys} +\title{Create new columns with JSON values} \usage{ spread_values(x, ...) } @@ -14,6 +14,18 @@ and 'value' must be a call to jstring(), jnumber() or jlogical() specifying the path to get the value (and the type implicit in the function name)} } \description{ -Extracts values from JSON refereced by a sequence of keys +The spread_values() function lets you dive into (potentially nested) JSON +objects and extract specific values. spread_values() takes jstring(), +jnumber() or jlogical() named function calls as arguments in order to specify +the type of the data that should be captured at each desired key location. +These values can be of varying types at varying depths. +} +\examples{ +library(magrittr) # for \%>\% +'{"name": {"first": "bob", "last": "jones"}, "age": 32}' \%>\% + spread_values( + first.name = jstring("name", "first"), + age = jnumber("age") + ) } diff --git a/vignettes/introduction-to-tidyjson.Rmd b/vignettes/introduction-to-tidyjson.Rmd index cb6dc52..07d8efc 100644 --- a/vignettes/introduction-to-tidyjson.Rmd +++ b/vignettes/introduction-to-tidyjson.Rmd @@ -415,7 +415,7 @@ When investigating JSON data it can be helpful to identify the lengths of the JSON objects or arrays, especialy when they are 'ragged' across documents: ```{r} -c('[1, 2, 3]', '{"k1": 1, "k2": 2}', '1') %>% json_lengths +c('[1, 2, 3]', '{"k1": 1, "k2": 2}', '1', {}) %>% json_lengths ``` ## Strategies