Skip to content

Commit

Permalink
adding function for creating structural metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
collinschwantes committed Aug 23, 2024
1 parent af93ef8 commit 5f75ab6
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(correct_data)
export(create_freetext_log)
export(create_questionnaire_log)
export(create_rules_from_template)
export(create_structural_metadata)
export(create_translation_log)
export(create_validation_log)
export(detect_language)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# ohcleandat 0.3.4

* Setups up a minimal structural metadata framework for tabular datasets.

# ohcleandat 0.3.3

* Adds more control over the function used in `get_precision` and `obfuscate_gps`
Expand Down
121 changes: 121 additions & 0 deletions R/create_structural_metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#' Create Structural Metadata from a dataframe
#'
#' This is the metadata that describes the data themselves. This metadata can be
#' generated then joined to pre-existing metadata via field names.
#'
#'
#' @param data Any named object. Expects a table but will work
#' superficially with lists or named vectors.
#'
#' @details
#'
#' The metadata table produced has the following elements
#'
#' `name` = The name of the field. This is taken as is from `data`.
#' `description` = Description of that field. May be provided by controlled vocabulary
#' `units` = Units of measure for that field. May or may not apply
#' `term_uri` = Universal Resource Identifier for a term from a controlled vocabulary or schema
#' `comments` = Free text providing additional details about the field
#' `primary_key` = `TRUE` or `FALSE`, Uniquely identifies each record in the data
#' `foreign_key` = `TRUE` or `FALSE`, Allows for linkages between data sets. Uniquely identifies
#' records in a different data set
#'
#'
#' @return dataframe with standard metadata requirements
#' @export
#'
#' @examples
#' \dontrun{
#' df <- data.frame(a = 1:10, b = letters[1:10])
#' df_metadata <- create_structural_metadata(df)
#' write.csv(df_metadata,"df_metadata.csv")
#'
#' # lets pretend we are using a dataset which already has
#' ## in airtable, you can add field descriptions directly
#' ## in the base. We want those exported and properly formatted
#' ## in our ohcleandat workflow
#'
#' base <- "appMyBaseID"
#' table_name <- "My Table"
#'
#' airtable_metadata <- airtabler::air_generate_metadata_from_api(base = base,
#' field_names_to_snake_case = FALSE ) |>
#' dplyr::filter(table_name == {table_name}) |>
#' dplyr::select(field_name,field_desc,primary_key)
#'
#' airtable_df <- airtabler::fetch_all(base = base, table_name = table_name)
#'
#' airtable_df_metadata <- create_structural_metadata(airtable_df)
#'
#' metadata_joined <- dplyr::left_join(airtable_df_metadata,airtable_metadata,
#' by = c("name"="field_name"))
#'
#' metdata_updated <- metadata_joined |>
#' dplyr::mutate(description = field_desc,
#' primary_key = primary_key.y,
#' ) |>
#' dplyr::select(-matches('\\.[xy]|field_desc'))
#'
#' # ODK
#' # get all choices from ODK form
#'
#' dotenv::load_dot_env()
#'
#' ruODK::ru_setup(
#' svc = "https://odk.server.org/v1/projects/5/forms/myproject.svc",
#' un = Sys.getenv("ODK_USERNAME"),
#' pw = Sys.getenv("ODK_PASSWORD"),
#' tz = "GMT",
#' odkc_version = "1.1.2")
#'
#'
#' schema <- ruODK::form_schema_ext()
#'
#' schema$choices_flat <-schema$`choices_english_(en)` |>
#' purrr::map_chr(\(x){
#' if("labels" %in% names(x)){
#' paste(x$labels,collapse = ", ")
#' } else {
#' ""
#' }
#'
#' })
#'
#' data_odk <- ruODK::odata_submission_get()
#' data_odk_rect <- ruODK::odata_submission_rectangle(data_odk)
#' odk_metadata <- create_structural_metadata(data_odk_rect)
#'
#'
#' odk_metadata_joined <- dplyr::left_join(odk_metadata,schema_simple,
#' by = c("name" = "ruodk_name"))
#'
#' odk_metadata_choices <- odk_metadata_joined |>
#' mutate(description = choices_flat) |>
#' select(-choices_flat)
#'
#'
#' }
#'
create_structural_metadata <- function(data){


# create empty data frame
metadata <- tibble::tibble(
name = character(),
description = character(),
units = character(),
term_uri = character(),
comments = character(),
primary_key = logical(),
foreign_key = logical()
)


# get fields
metadata$name = names(data)


return(metadata)


}
104 changes: 104 additions & 0 deletions man/create_structural_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5f75ab6

Please sign in to comment.