-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: get_attributes() now works and works pretty well! handles all po…
- Loading branch information
Showing
1 changed file
with
166 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,53 +1,186 @@ | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
#' Title | ||
#' | ||
#' @param corpus | ||
#' | ||
#' @return | ||
#' @export | ||
#' | ||
#' | ||
#' | ||
#' | ||
#' | ||
#' | ||
#' | ||
#' | ||
#' | ||
#' | ||
#' only data table attributes for now | ||
|
||
#' @examples | ||
get_attributes <- function(corpus) { | ||
vw_att <- data.frame() | ||
vw_att <- list() | ||
|
||
for (i in seq_along(corpus)) { | ||
pk <- get_pk(names(corpus)[[i]]) | ||
eml <- corpus[[i]] | ||
pk <- parse_packageId(names(corpus)[[i]]) | ||
scope <- pk[["scope"]] | ||
id <- pk[["id"]] | ||
rev <- pk[["rev"]] | ||
|
||
dts <- | ||
handle_one(purrr::compact(corpus[[i]][["dataset"]][["dataTable"]])) | ||
ent_groups <- | ||
purrr::compact(eml[["dataset"]][c("dataTable", | ||
"spatialVector", | ||
"spatialRaster", | ||
"otherEntity", | ||
"view")]) | ||
# exit if no entities found | ||
if (is.null(ent_groups)) | ||
return() | ||
groupdf <- list() | ||
for (j in seq_along(ent_groups)) { | ||
ents <- handle_one(ent_groups[[j]]) | ||
|
||
for (j in seq_along(dts)) { | ||
dt <- dts[[j]] | ||
att_list <- lapply(seq_along(ents), function(x) { | ||
ent <- ents[[x]] | ||
if ("attributeList" %in% names(ent)) { | ||
attdf <- | ||
parse_attributeList(x = ent[["attributeList"]])#, | ||
#eml = corpus[[i]])) | ||
n <- | ||
nrow(attdf) | ||
#print(paste(i, j, how_many)) | ||
#print(attdf$attributes) | ||
info <- data.frame( | ||
stringsAsFactors = F, | ||
scope = rep(scope, n), | ||
datasetid = rep(id, n), | ||
rev = rep(rev, n), | ||
entity = rep(paste0(j, x), n), | ||
entitytype = paste0(names(ent_groups)[[j]]) | ||
) | ||
return(cbind(info, attdf)) | ||
} | ||
}) | ||
groupdf[[j]] <- data.table::rbindlist(att_list, fill = TRUE) | ||
} | ||
vw_att[[i]] <- data.table::rbindlist(groupdf, fill = TRUE) | ||
} | ||
return(data.table::rbindlist(vw_att, fill = TRUE)) | ||
} | ||
|
||
if (!is.null(dt[["attributeList"]])) { | ||
attdf <- EML::get_attributes(x = dt[["attributeList"]], | ||
eml = corpus[[i]]) | ||
how_many <- nrow(attdf[["attributes"]]) | ||
# print(how_many) | ||
attdf2 <- data.frame( | ||
stringsAsFactors = F, | ||
scope = rep(scope, how_many), | ||
id = rep(id, how_many), | ||
rev = rep(rev, how_many), | ||
entityposition = rep(j, how_many) | ||
) | ||
|
||
# print(str(attdf[["attributes"]])) | ||
|
||
attdf <- dplyr::bind_cols(attdf2,attdf[["attributes"]]) | ||
|
||
vw_att <- dplyr::bind_rows(vw_att, attdf) | ||
|
||
#' Title | ||
#' | ||
#' @param eml | ||
#' @param x (list) attributeList EML node | ||
#' | ||
#' @return | ||
#' | ||
#' @examples | ||
parse_attributeList <- function(x, eml = NULL) { | ||
attributeList <- x | ||
## check to make sure input appears to be an attributeList | ||
if (!("attribute" %in% names(attributeList)) & | ||
is.null(attributeList$references)) { | ||
stop(call. = FALSE, | ||
"Input does not appear to be an attributeList.") | ||
} | ||
## if the attributeList is referenced, get reference | ||
if (!is.null(attributeList$references)) { | ||
if (is.null(eml)) { | ||
warning( | ||
"The attributeList entered is referenced somewhere else in the eml. ", | ||
"No eml was entered to find the attributes. ", | ||
"Please enter the eml to get better results." | ||
) | ||
eml <- x | ||
} | ||
|
||
all_attributeLists <- eml_get(eml, "attributeList") | ||
|
||
for (attList in all_attributeLists) { | ||
if (attList$id == attributeList$references) { | ||
attributeList <- attList | ||
break | ||
} | ||
} | ||
} | ||
attributes <- lapply(attributeList$attribute, parse_attribute) | ||
attributes <- data.table::rbindlist(attributes, fill = T) | ||
## remove non_fields in attributes | ||
non_fields <- c("enforced", | ||
"exclusive", | ||
"order", | ||
"references", | ||
"scope", | ||
"system", | ||
"typeSystem", | ||
"missingValueCode", | ||
"missingValueCodeExplanation", | ||
"propertyLabel", | ||
"propertyURI", | ||
"valueLabel", | ||
"valueURI") | ||
attributes <- | ||
subset(attributes, select = !(names(attributes) %in% non_fields)) | ||
return(attributes) | ||
} | ||
|
||
|
||
|
||
|
||
#' Title | ||
#' | ||
#' @param x (list) attribute EML node | ||
#' | ||
#' @return | ||
#' | ||
#' @examples | ||
parse_attribute <- function(x) { | ||
## get full attribute list | ||
att <- unlist(x, recursive = TRUE, use.names = TRUE) | ||
measurementScale <- names(x$measurementScale) | ||
domain <- names(x$measurementScale[[measurementScale]]) | ||
|
||
if (length(domain) == 1) { | ||
## domain == "nonNumericDomain" | ||
domain <- | ||
names(x$measurementScale[[measurementScale]][[domain]]) | ||
} | ||
domain <- domain[grepl("Domain", domain)] | ||
|
||
if (measurementScale == "dateTime" & is.null(domain)) { | ||
domain <- "dateTimeDomain" | ||
} | ||
return(vw_att) | ||
|
||
att <- | ||
c(att, measurementScale = measurementScale, domain = domain) | ||
|
||
## separate factors | ||
att <- att[!grepl("enumeratedDomain", names(att))] | ||
|
||
## separate methods | ||
att <- att[!grepl("methods", names(att))] | ||
|
||
## Alter names to be consistent with other tools | ||
names(att) <- gsub("missingValueCode.code", | ||
"missingValueCode", | ||
names(att), | ||
fixed = TRUE) | ||
names(att) <- gsub("standardUnit|customUnit", | ||
"unit", | ||
names(att)) | ||
## Alter names of annotation label fields for accessibility | ||
names(att) <- gsub("annotation.valueURI.label", | ||
"valueLabel", | ||
names(att), | ||
fixed = TRUE) | ||
names(att) <- gsub("annotation.propertyURI.label", | ||
"propertyLabel", | ||
names(att), | ||
fixed = TRUE) | ||
names(att) <- gsub(".+\\.+", | ||
"", | ||
names(att)) | ||
att <- as.data.frame(t(att), stringsAsFactors = FALSE) | ||
return(att) | ||
} |