From 89b72ca98396cc02fb22734e693e994066bf3ff3 Mon Sep 17 00:00:00 2001 From: atn38-crwr-d07322 Date: Wed, 6 Apr 2022 20:36:28 -0500 Subject: [PATCH] fix: get_attributes() now works and works pretty well! handles all possible numbers of entities as well and should keep IDs consistent. #5 #6 #3 --- R/get_attributes.R | 199 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 166 insertions(+), 33 deletions(-) diff --git a/R/get_attributes.R b/R/get_attributes.R index 291ee83..402585c 100644 --- a/R/get_attributes.R +++ b/R/get_attributes.R @@ -1,53 +1,186 @@ + + + + + + + +#' Title #' +#' @param corpus #' +#' @return +#' @export #' -#' -#' -#' -#' -#' -#' -#' -#' -#' -#' only data table attributes for now - +#' @examples get_attributes <- function(corpus) { - vw_att <- data.frame() + vw_att <- list() for (i in seq_along(corpus)) { - pk <- get_pk(names(corpus)[[i]]) + eml <- corpus[[i]] + pk <- parse_packageId(names(corpus)[[i]]) scope <- pk[["scope"]] id <- pk[["id"]] rev <- pk[["rev"]] - dts <- - handle_one(purrr::compact(corpus[[i]][["dataset"]][["dataTable"]])) + ent_groups <- + purrr::compact(eml[["dataset"]][c("dataTable", + "spatialVector", + "spatialRaster", + "otherEntity", + "view")]) + # exit if no entities found + if (is.null(ent_groups)) + return() + groupdf <- list() + for (j in seq_along(ent_groups)) { + ents <- handle_one(ent_groups[[j]]) - for (j in seq_along(dts)) { - dt <- dts[[j]] + att_list <- lapply(seq_along(ents), function(x) { + ent <- ents[[x]] + if ("attributeList" %in% names(ent)) { + attdf <- + parse_attributeList(x = ent[["attributeList"]])#, + #eml = corpus[[i]])) + n <- + nrow(attdf) + #print(paste(i, j, how_many)) + #print(attdf$attributes) + info <- data.frame( + stringsAsFactors = F, + scope = rep(scope, n), + datasetid = rep(id, n), + rev = rep(rev, n), + entity = rep(paste0(j, x), n), + entitytype = paste0(names(ent_groups)[[j]]) + ) + return(cbind(info, attdf)) + } + }) + groupdf[[j]] <- data.table::rbindlist(att_list, fill = TRUE) + } + vw_att[[i]] <- data.table::rbindlist(groupdf, fill = TRUE) + } + return(data.table::rbindlist(vw_att, fill = TRUE)) +} - if (!is.null(dt[["attributeList"]])) { - attdf <- EML::get_attributes(x = dt[["attributeList"]], - eml = corpus[[i]]) - how_many <- nrow(attdf[["attributes"]]) - # print(how_many) - attdf2 <- data.frame( - stringsAsFactors = F, - scope = rep(scope, how_many), - id = rep(id, how_many), - rev = rep(rev, how_many), - entityposition = rep(j, how_many) - ) - # print(str(attdf[["attributes"]])) - attdf <- dplyr::bind_cols(attdf2,attdf[["attributes"]]) - vw_att <- dplyr::bind_rows(vw_att, attdf) + +#' Title +#' +#' @param eml +#' @param x (list) attributeList EML node +#' +#' @return +#' +#' @examples +parse_attributeList <- function(x, eml = NULL) { + attributeList <- x + ## check to make sure input appears to be an attributeList + if (!("attribute" %in% names(attributeList)) & + is.null(attributeList$references)) { + stop(call. = FALSE, + "Input does not appear to be an attributeList.") + } + ## if the attributeList is referenced, get reference + if (!is.null(attributeList$references)) { + if (is.null(eml)) { + warning( + "The attributeList entered is referenced somewhere else in the eml. ", + "No eml was entered to find the attributes. ", + "Please enter the eml to get better results." + ) + eml <- x + } + + all_attributeLists <- eml_get(eml, "attributeList") + + for (attList in all_attributeLists) { + if (attList$id == attributeList$references) { + attributeList <- attList + break } } + } + attributes <- lapply(attributeList$attribute, parse_attribute) + attributes <- data.table::rbindlist(attributes, fill = T) + ## remove non_fields in attributes + non_fields <- c("enforced", + "exclusive", + "order", + "references", + "scope", + "system", + "typeSystem", + "missingValueCode", + "missingValueCodeExplanation", + "propertyLabel", + "propertyURI", + "valueLabel", + "valueURI") + attributes <- + subset(attributes, select = !(names(attributes) %in% non_fields)) + return(attributes) +} + + + +#' Title +#' +#' @param x (list) attribute EML node +#' +#' @return +#' +#' @examples +parse_attribute <- function(x) { + ## get full attribute list + att <- unlist(x, recursive = TRUE, use.names = TRUE) + measurementScale <- names(x$measurementScale) + domain <- names(x$measurementScale[[measurementScale]]) + + if (length(domain) == 1) { + ## domain == "nonNumericDomain" + domain <- + names(x$measurementScale[[measurementScale]][[domain]]) + } + domain <- domain[grepl("Domain", domain)] + + if (measurementScale == "dateTime" & is.null(domain)) { + domain <- "dateTimeDomain" } - return(vw_att) + + att <- + c(att, measurementScale = measurementScale, domain = domain) + + ## separate factors + att <- att[!grepl("enumeratedDomain", names(att))] + + ## separate methods + att <- att[!grepl("methods", names(att))] + + ## Alter names to be consistent with other tools + names(att) <- gsub("missingValueCode.code", + "missingValueCode", + names(att), + fixed = TRUE) + names(att) <- gsub("standardUnit|customUnit", + "unit", + names(att)) + ## Alter names of annotation label fields for accessibility + names(att) <- gsub("annotation.valueURI.label", + "valueLabel", + names(att), + fixed = TRUE) + names(att) <- gsub("annotation.propertyURI.label", + "propertyLabel", + names(att), + fixed = TRUE) + names(att) <- gsub(".+\\.+", + "", + names(att)) + att <- as.data.frame(t(att), stringsAsFactors = FALSE) + return(att) }